diff --git a/.appveyor.yml b/.appveyor.yml
index 149a8a3..85445d4 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -19,7 +19,7 @@
     if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" }
     $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH"
     python -W ignore -m pip install --upgrade pip wheel
-    python -W ignore -m pip install pytest numpy --no-warn-script-location
+    python -W ignore -m pip install pytest numpy --no-warn-script-location pytest-timeout
 - ps: |
     Start-FileDownload 'https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.zip'
     7z x eigen-3.3.7.zip -y > $null
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..8700fca
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,21 @@
+---
+# See all possible options and defaults with:
+# clang-format --style=llvm --dump-config
+BasedOnStyle: LLVM
+AccessModifierOffset: -4
+AlignConsecutiveAssignments: true
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: false
+BinPackParameters: false
+BreakBeforeBinaryOperators: All
+BreakConstructorInitializers: BeforeColon
+ColumnLimit: 99
+IndentCaseLabels: true
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+Language: Cpp
+SpaceAfterCStyleCast: true
+# SpaceInEmptyBlock: true # too new
+Standard: Cpp11
+TabWidth: 4
+...
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 4ced21b..08d9e7c 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -126,7 +126,8 @@
 `--target` can be spelled `-t` in CMake 3.15+. You can also run individual
 tests with these targets:
 
-* `pytest`: Python tests only
+* `pytest`: Python tests only, using the
+[pytest](https://docs.pytest.org/en/stable/) framework
 * `cpptest`: C++ tests only
 * `test_cmake_build`: Install / subdirectory tests
 
@@ -134,6 +135,17 @@
 `-DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp"`. If this is
 empty, all tests will be built.
 
+You may also pass flags to the `pytest` target by editing `tests/pytest.ini` or
+by using the `PYTEST_ADDOPTS` environment variable
+(see [`pytest` docs](https://docs.pytest.org/en/2.7.3/customize.html#adding-default-options)). As an example:
+
+```bash
+env PYTEST_ADDOPTS="--capture=no --exitfirst" \
+    cmake --build build --target pytest
+# Or using abbreviated flags
+env PYTEST_ADDOPTS="-s -x" cmake --build build --target pytest
+```
+
 ### Formatting
 
 All formatting is handled by pre-commit.
@@ -164,10 +176,35 @@
 pre-commit install
 ```
 
+### Clang-Format
+
+As of v2.6.2, pybind11 ships with a [`clang-format`][clang-format]
+configuration file at the top level of the repo (the filename is
+`.clang-format`). Currently, formatting is NOT applied automatically, but
+manually using `clang-format` for newly developed files is highly encouraged.
+To check if a file needs formatting:
+
+```bash
+clang-format -style=file --dry-run some.cpp
+```
+
+The output will show things to be fixed, if any. To actually format the file:
+
+```bash
+clang-format -style=file -i some.cpp
+```
+
+Note that the `-style-file` option searches the parent directories for the
+`.clang-format` file, i.e. the commands above can be run in any subdirectory
+of the pybind11 repo.
+
 ### Clang-Tidy
 
-To run Clang tidy, the following recipe should work. Files will be modified in
-place, so you can use git to monitor the changes.
+[`clang-tidy`][clang-tidy] performs deeper static code analyses and is
+more complex to run, compared to `clang-format`, but support for `clang-tidy`
+is built into the pybind11 CMake configuration. To run `clang-tidy`, the
+following recipe should work. Files will be modified in place, so you can
+use git to monitor the changes.
 
 ```bash
 docker run --rm -v $PWD:/pybind11 -it silkeh/clang:10
@@ -186,7 +223,7 @@
 cmake --build build
 ```
 
-The report is sent to stderr; you can pip it into a file if you wish.
+The report is sent to stderr; you can pipe it into a file if you wish.
 
 ### Build recipes
 
@@ -313,6 +350,8 @@
 
 
 [pre-commit]: https://pre-commit.com
+[clang-format]: https://clang.llvm.org/docs/ClangFormat.html
+[clang-tidy]: https://clang.llvm.org/extra/clang-tidy/
 [pybind11.readthedocs.org]: http://pybind11.readthedocs.org/en/latest
 [issue tracker]: https://github.com/pybind/pybind11/issues
 [gitter]: https://gitter.im/pybind/Lobby
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index c1eac3c..7327336 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,6 +6,11 @@
     schedule:
       interval: "daily"
     ignore:
-      # Offical actions have moving tags like v1
+      # Official actions have moving tags like v1
       # that are used, so they don't need updates here
-      - dependency-name: "actions/*"
+      - dependency-name: "actions/checkout"
+      - dependency-name: "actions/setup-python"
+      - dependency-name: "actions/cache"
+      - dependency-name: "actions/upload-artifact"
+      - dependency-name: "actions/download-artifact"
+      - dependency-name: "actions/labeler"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 5570f6f..97a6ff7 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -5,7 +5,8 @@
 
 ## Suggested changelog entry:
 
-<!-- fill in the below block with the expected RestructuredText entry (delete if no entry needed) -->
+<!-- Fill in the below block with the expected RestructuredText entry. Delete if no entry needed;
+     but do not delete header or rst block if an entry is needed! Will be collected via a script. -->
 
 ```rst
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 73424f9..f90c199 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,10 +21,8 @@
         - 2.7
         - 3.5
         - 3.6
-        - 3.7
-        - 3.8
         - 3.9
-        # - 3.10.0-alpha.1 - need next release for pybind11 fix
+        # - 3.10-dev  # Re-enable once 3.10.0a5 is released
         - pypy2
         - pypy3
 
@@ -44,10 +42,6 @@
             python: 3.6
             args: >
               -DPYBIND11_FINDPYTHON=ON
-          - runs-on: ubuntu-latest
-            python: 3.8
-            args: >
-              -DPYBIND11_FINDPYTHON=ON
 
         # These items will be removed from the build matrix, keys must match.
         exclude:
@@ -57,11 +51,9 @@
           - runs-on: windows-latest
             python: pypy3
 
-          # Let's drop a few macOS runs since that tends to be 2.7 or 3.8+
-          - runs-on: macos-latest
-            python: 3.6
-          - runs-on: macos-latest
-            python: 3.7
+          # TODO: PyPy2 7.3.3 segfaults, while 7.3.2 was fine.
+          - runs-on: ubuntu-latest
+            python: pypy2
 
     name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • x64 ${{ matrix.args }}"
     runs-on: ${{ matrix.runs-on }}
@@ -79,7 +71,7 @@
       run: echo "BOOST_ROOT=$BOOST_ROOT_1_72_0" >> $GITHUB_ENV
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.4
+      uses: jwlawson/actions-setup-cmake@v1.7
 
     - name: Cache wheels
       if: runner.os == 'macOS'
@@ -117,7 +109,7 @@
 
     - name: C++11 tests
       # TODO: Figure out how to load the DLL on Python 3.8+
-      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9))"
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
       run: cmake --build .  --target cpptest -j 2
 
     - name: Interface test C++11
@@ -145,7 +137,7 @@
 
     - name: C++ tests
       # TODO: Figure out how to load the DLL on Python 3.8+
-      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9))"
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
       run: cmake --build build2 --target cpptest
 
     - name: Interface test
@@ -173,6 +165,84 @@
       run: pytest tests/extra_setuptools
 
 
+  deadsnakes:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - python-version: 3.9
+          python-debug: true
+          valgrind: true
+        - python-version: 3.10-dev
+          python-debug: false
+
+    name: "🐍 ${{ matrix.python-version }}${{ matrix.python-debug && '-dbg' || '' }} (deadsnakes)${{ matrix.valgrind && ' • Valgrind' || '' }} • x64"
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup Python ${{ matrix.python-version }} (deadsnakes)
+      uses: deadsnakes/action@v2.1.1
+      with:
+        python-version: ${{ matrix.python-version }}
+        debug: ${{ matrix.python-debug }}
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.7
+
+    - name: Valgrind cache
+      if: matrix.valgrind
+      uses: actions/cache@v2
+      id: cache-valgrind
+      with:
+        path: valgrind
+        key: 3.16.1 # Valgrind version
+
+    - name: Compile Valgrind
+      if: matrix.valgrind && steps.cache-valgrind.outputs.cache-hit != 'true'
+      run: |
+        VALGRIND_VERSION=3.16.1
+        curl https://sourceware.org/pub/valgrind/valgrind-$VALGRIND_VERSION.tar.bz2 -o - | tar xj
+        mv valgrind-$VALGRIND_VERSION valgrind
+        cd valgrind
+        ./configure
+        make -j 2 > /dev/null
+
+    - name: Install Valgrind
+      if: matrix.valgrind
+      working-directory: valgrind
+      run: |
+        sudo make install
+        sudo apt-get update
+        sudo apt-get install libc6-dbg  # Needed by Valgrind
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    - name: Configure
+      run: >
+        cmake -S . -B build
+        -DCMAKE_BUILD_TYPE=Debug
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=17
+
+    - name: Build
+      run: cmake --build build -j 2
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++ tests
+      run: cmake --build build --target cpptest
+
+    - name: Run Valgrind on Python tests
+      if: matrix.valgrind
+      run: cmake --build build --target memcheck
+
+
   # Testing on clang using the excellent silkeh clang docker images
   clang:
     runs-on: ubuntu-latest
@@ -250,44 +320,48 @@
       run: cmake --build build --target pytest
 
 
-  # Testing CentOS 8 + PGI compilers
-  centos-nvhpc8:
-    runs-on: ubuntu-latest
-    name: "🐍 3 • CentOS8 / PGI 20.7 • x64"
-    container: centos:8
-
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Add Python 3 and a few requirements
-      run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules
-
-    - name: Install CMake with pip
-      run: |
-        python3 -m pip install --upgrade pip
-        python3 -m pip install cmake --prefer-binary
-
-    - name: Install NVidia HPC SDK
-      run: yum -y install https://developer.download.nvidia.com/hpc-sdk/nvhpc-20-7-20.7-1.x86_64.rpm https://developer.download.nvidia.com/hpc-sdk/nvhpc-2020-20.7-1.x86_64.rpm
-
-    - name: Configure
-      shell: bash
-      run: |
-        source /etc/profile.d/modules.sh
-        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.7
-        cmake -S . -B build -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=14 -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
-
-    - name: Build
-      run: cmake --build build -j 2 --verbose
-
-    - name: Python tests
-      run: cmake --build build --target pytest
-
-    - name: C++ tests
-      run: cmake --build build --target cpptest
-
-    - name: Interface test
-      run: cmake --build build --target test_cmake_build
+# TODO: Internal compiler error - report to NVidia
+#  # Testing CentOS 8 + PGI compilers
+#  centos-nvhpc8:
+#    runs-on: ubuntu-latest
+#    name: "🐍 3 • CentOS8 / PGI 20.11 • x64"
+#    container: centos:8
+#
+#    steps:
+#    - uses: actions/checkout@v2
+#
+#    - name: Add Python 3 and a few requirements
+#      run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules
+#
+#    - name: Install CMake with pip
+#      run: |
+#        python3 -m pip install --upgrade pip
+#        python3 -m pip install cmake --prefer-binary
+#
+#    - name: Install NVidia HPC SDK
+#      run: >
+#        yum -y install
+#        https://developer.download.nvidia.com/hpc-sdk/20.11/nvhpc-20-11-20.11-1.x86_64.rpm
+#        https://developer.download.nvidia.com/hpc-sdk/20.11/nvhpc-2020-20.11-1.x86_64.rpm
+#
+#    - name: Configure
+#      shell: bash
+#      run: |
+#        source /etc/profile.d/modules.sh
+#        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.11
+#        cmake -S . -B build -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=14 -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+#
+#    - name: Build
+#      run: cmake --build build -j 2 --verbose
+#
+#    - name: Python tests
+#      run: cmake --build build --target pytest
+#
+#    - name: C++ tests
+#      run: cmake --build build --target cpptest
+#
+#    - name: Interface test
+#      run: cmake --build build --target test_cmake_build
 
 
   # Testing on CentOS 7 + PGI compilers, which seems to require more workarounds
@@ -338,6 +412,7 @@
     - name: Interface test
       run: cmake3 --build build --target test_cmake_build
 
+
   # Testing on GCC using the GCC docker images (only recent images supported)
   gcc:
     runs-on: ubuntu-latest
@@ -365,10 +440,8 @@
     - name: Update pip
       run: python3 -m pip install --upgrade pip
 
-    - name: Setup CMake 3.18
-      uses: jwlawson/actions-setup-cmake@v1.4
-      with:
-        cmake-version: 3.18
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.7
 
     - name: Configure
       shell: bash
@@ -392,6 +465,103 @@
       run: cmake --build build --target test_cmake_build
 
 
+  # Testing on ICC using the oneAPI apt repo
+  icc:
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+
+    name: "🐍 3 • ICC latest • x64"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Add apt repo
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
+        wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+        sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+        echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
+
+    - name: Add ICC & Python 3
+      run: sudo apt-get update; sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic cmake python3-dev python3-numpy python3-pytest python3-pip
+
+    - name: Update pip
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        python3 -m pip install --upgrade pip
+
+    - name: Install dependencies
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        python3 -m pip install -r tests/requirements.txt --prefer-binary
+
+    - name: Configure C++11
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake -S . -B build-11     \
+        -DPYBIND11_WERROR=ON    \
+        -DDOWNLOAD_CATCH=ON     \
+        -DDOWNLOAD_EIGEN=OFF    \
+        -DCMAKE_CXX_STANDARD=11             \
+        -DCMAKE_CXX_COMPILER=$(which icpc)  \
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build C++11
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-11 -j 2 -v
+
+    - name: Python tests C++11
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        sudo service apport stop
+        cmake --build build-11 --target check
+
+    - name: C++ tests C++11
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-11 --target cpptest
+
+    - name: Interface test C++11
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-11 --target test_cmake_build
+
+    - name: Configure C++17
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake -S . -B build-17     \
+        -DPYBIND11_WERROR=ON    \
+        -DDOWNLOAD_CATCH=ON     \
+        -DDOWNLOAD_EIGEN=OFF    \
+        -DCMAKE_CXX_STANDARD=17             \
+        -DCMAKE_CXX_COMPILER=$(which icpc)  \
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build C++17
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-17 -j 2 -v
+
+    - name: Python tests C++17
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        sudo service apport stop
+        cmake --build build-17 --target check
+
+    - name: C++ tests C++17
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-17 --target cpptest
+
+    - name: Interface test C++17
+      run: |
+        set +e; source /opt/intel/oneapi/setvars.sh; set -e
+        cmake --build build-17 --target test_cmake_build
+
+
   # Testing on CentOS (manylinux uses a centos base, and this is an easy way
   # to get GCC 4.8, which is the manylinux1 compiler).
   centos:
@@ -553,7 +723,7 @@
         architecture: x86
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.4
+      uses: jwlawson/actions-setup-cmake@v1.7
 
     - name: Prepare MSVC
       uses: ilammy/msvc-dev-cmd@v1
@@ -599,7 +769,7 @@
         python-version: ${{ matrix.python }}
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.4
+      uses: jwlawson/actions-setup-cmake@v1.7
 
     - name: Prepare MSVC
       uses: ilammy/msvc-dev-cmd@v1
@@ -653,7 +823,7 @@
         python-version: ${{ matrix.python }}
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.4
+      uses: jwlawson/actions-setup-cmake@v1.7
 
     - name: Prepare env
       run: python -m pip install -r tests/requirements.txt --prefer-binary
diff --git a/.github/workflows/configure.yml b/.github/workflows/configure.yml
index 23f6022..578dba6 100644
--- a/.github/workflows/configure.yml
+++ b/.github/workflows/configure.yml
@@ -55,7 +55,7 @@
     # An action for adding a specific version of CMake:
     #   https://github.com/jwlawson/actions-setup-cmake
     - name: Setup CMake ${{ matrix.cmake }}
-      uses: jwlawson/actions-setup-cmake@v1.3
+      uses: jwlawson/actions-setup-cmake@v1.7
       with:
         cmake-version: ${{ matrix.cmake }}
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 85254a8..6781ac4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@
 repos:
 # Standard hooks
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.2.0
+  rev: v3.4.0
   hooks:
   - id: check-added-large-files
   - id: check-case-conflict
@@ -46,7 +46,7 @@
 
 # Flake8 also supports pre-commit natively (same author)
 - repo: https://gitlab.com/pycqa/flake8
-  rev: 3.8.3
+  rev: 3.8.4
   hooks:
   - id: flake8
     additional_dependencies: [flake8-bugbear, pep8-naming]
@@ -63,7 +63,7 @@
 
 # Check static types with mypy
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.790
+  rev: v0.800
   hooks:
   - id: mypy
     # The default Python type ignores .pyi files, so let's rerun if detected
@@ -74,7 +74,7 @@
 
 # Checks the manifest for missing files (native support)
 - repo: https://github.com/mgedmin/check-manifest
-  rev: "0.43"
+  rev: "0.46"
   hooks:
   - id: check-manifest
     # This is a slow hook, so only run this if --hook-stage manual is passed
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c08ff0..ded4dad 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,8 +45,13 @@
   message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}")
 endif()
 
+# Avoid infinite recursion if tests include this as a subdirectory
+if(DEFINED PYBIND11_MASTER_PROJECT)
+  set(PYBIND11_TEST OFF)
+endif()
+
 # Check if pybind11 is being used directly or via add_subdirectory
-if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
+if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND NOT DEFINED PYBIND11_MASTER_PROJECT)
   ### Warn if not an out-of-source builds
   if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
     set(lines
@@ -73,6 +78,8 @@
     set(CMAKE_CXX_EXTENSIONS OFF)
     set(CMAKE_CXX_STANDARD_REQUIRED ON)
   endif()
+
+  set(pybind11_system "")
 else()
   set(PYBIND11_MASTER_PROJECT OFF)
   set(pybind11_system SYSTEM)
@@ -159,12 +166,24 @@
 # You can also place ifs *in* the Config.in, but not here.
 
 # This section builds targets, but does *not* touch Python
+# Non-IMPORT targets cannot be defined twice
+if(NOT TARGET pybind11_headers)
+  # Build the headers-only target (no Python included):
+  # (long name used here to keep this from clashing in subdirectory mode)
+  add_library(pybind11_headers INTERFACE)
+  add_library(pybind11::pybind11_headers ALIAS pybind11_headers) # to match exported target
+  add_library(pybind11::headers ALIAS pybind11_headers) # easier to use/remember
 
-# Build the headers-only target (no Python included):
-# (long name used here to keep this from clashing in subdirectory mode)
-add_library(pybind11_headers INTERFACE)
-add_library(pybind11::pybind11_headers ALIAS pybind11_headers) # to match exported target
-add_library(pybind11::headers ALIAS pybind11_headers) # easier to use/remember
+  target_include_directories(
+    pybind11_headers ${pybind11_system} INTERFACE $<BUILD_INTERFACE:${pybind11_INCLUDE_DIR}>
+                                                  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+
+  target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
+                                                     cxx_right_angle_brackets)
+else()
+  # It is invalid to install a target twice, too.
+  set(PYBIND11_INSTALL OFF)
+endif()
 
 include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake")
 
@@ -175,14 +194,6 @@
   file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS})
 endif()
 
-# Fill in headers target
-target_include_directories(
-  pybind11_headers ${pybind11_system} INTERFACE $<BUILD_INTERFACE:${pybind11_INCLUDE_DIR}>
-                                                $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
-
-target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
-                                                   cxx_right_angle_brackets)
-
 if(PYBIND11_INSTALL)
   install(DIRECTORY ${pybind11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
   set(PYBIND11_CMAKECONFIG_INSTALL_DIR
diff --git a/METADATA b/METADATA
index 05c3ffb..519f644 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@
     type: GIT
     value: "https://github.com/pybind/pybind11.git"
   }
-  version: "v2.6.1"
+  version: "v2.6.2"
   license_type: NOTICE
   last_upgrade_date {
     year: 2021
     month: 1
-    day: 13
+    day: 26
   }
 }
diff --git a/README.rst b/README.rst
index 1474cb9..6c2a255 100644
--- a/README.rst
+++ b/README.rst
@@ -5,18 +5,27 @@
 
 |Latest Documentation Status| |Stable Documentation Status| |Gitter chat| |CI| |Build status|
 
+|Repology| |PyPI package| |Conda-forge| |Python Versions|
+
+`Setuptools example <https://github.com/pybind/python_example>`_
+• `Scikit-build example <https://github.com/pybind/scikit_build_example>`_
+• `CMake example <https://github.com/pybind/cmake_example>`_
+
+.. start
+
 .. warning::
 
-   Combining older versions of pybind11 (< 2.6.0) with the brand-new Python
-   3.9.0 will trigger undefined behavior that typically manifests as crashes
-   during interpreter shutdown (but could also destroy your data. **You have been
+   Combining older versions of pybind11 (< 2.6.0) with Python 3.9.0 will
+   trigger undefined behavior that typically manifests as crashes during
+   interpreter shutdown (but could also destroy your data. **You have been
    warned.**)
 
-   We recommend that you wait for Python 3.9.1 slated for release in December,
-   which will include a `fix <https://github.com/python/cpython/pull/22670>`_
-   that resolves this problem.  In the meantime, please update to the latest
-   version of pybind11 (2.6.0 or newer), which includes a temporary workaround
-   specifically when Python 3.9.0 is detected at runtime.
+   We recommend that you update to the latest patch release of Python (3.9.1),
+   which includes a `fix <https://github.com/python/cpython/pull/22670>`_
+   that resolves this problem. If you do use Python 3.9.0, please update to
+   the latest version of pybind11 (2.6.0 or newer), which includes a temporary
+   workaround specifically when Python 3.9.0 is detected at runtime.
+
 
 **pybind11** is a lightweight header-only library that exposes C++ types
 in Python and vice versa, mainly to create Python bindings of existing
@@ -127,11 +136,10 @@
    newer)
 2. GCC 4.8 or newer
 3. Microsoft Visual Studio 2015 Update 3 or newer
-4. Intel C++ compiler 18 or newer
-   (`possible issue <https://github.com/pybind/pybind11/pull/2573>`_ on 20.2)
-5. Cygwin/GCC (tested on 2.5.1)
-6. NVCC (CUDA 11.0 tested)
-7. NVIDIA PGI (20.7 and 20.9 tested)
+4. Intel classic C++ compiler 18 or newer (ICC 20.2 tested in CI)
+5. Cygwin/GCC (previously tested on 2.5.1)
+6. NVCC (CUDA 11.0 tested in CI)
+7. NVIDIA PGI (20.9 tested in CI)
 
 About
 -----
@@ -165,7 +173,7 @@
 
 .. |Latest Documentation Status| image:: https://readthedocs.org/projects/pybind11/badge?version=latest
    :target: http://pybind11.readthedocs.org/en/latest
-.. |Stable Documentation Status| image:: https://img.shields.io/badge/docs-stable-blue
+.. |Stable Documentation Status| image:: https://img.shields.io/badge/docs-stable-blue.svg
    :target: http://pybind11.readthedocs.org/en/stable
 .. |Gitter chat| image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg
    :target: https://gitter.im/pybind/Lobby
@@ -173,3 +181,11 @@
    :target: https://github.com/pybind/pybind11/actions
 .. |Build status| image:: https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true
    :target: https://ci.appveyor.com/project/wjakob/pybind11
+.. |PyPI package| image:: https://img.shields.io/pypi/v/pybind11.svg
+   :target: https://pypi.org/project/pybind11/
+.. |Conda-forge| image:: https://img.shields.io/conda/vn/conda-forge/pybind11.svg
+   :target: https://github.com/conda-forge/pybind11-feedstock
+.. |Repology| image:: https://repology.org/badge/latest-versions/python:pybind11.svg
+   :target: https://repology.org/project/python:pybind11/versions
+.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/pybind11.svg
+   :target: https://pypi.org/project/pybind11/
diff --git a/docs/advanced/cast/stl.rst b/docs/advanced/cast/stl.rst
index 70fde0d..b8622ee 100644
--- a/docs/advanced/cast/stl.rst
+++ b/docs/advanced/cast/stl.rst
@@ -72,6 +72,17 @@
 a ``name::visit()`` function. For any other function name, the specialization must be
 included to tell pybind11 how to visit the variant.
 
+.. warning::
+
+    When converting a ``variant`` type, pybind11 follows the same rules as when
+    determining which function overload to call (:ref:`overload_resolution`), and
+    so the same caveats hold. In particular, the order in which the ``variant``'s
+    alternatives are listed is important, since pybind11 will try conversions in
+    this order. This means that, for example, when converting ``variant<int, bool>``,
+    the ``bool`` variant will never be selected, as any Python ``bool`` is already
+    an ``int`` and is convertible to a C++ ``int``. Changing the order of alternatives
+    (and using ``variant<bool, int>``, in this example) provides a solution.
+
 .. note::
 
     pybind11 only supports the modern implementation of ``boost::variant``
diff --git a/docs/advanced/exceptions.rst b/docs/advanced/exceptions.rst
index 5eae556..7a4d6cb 100644
--- a/docs/advanced/exceptions.rst
+++ b/docs/advanced/exceptions.rst
@@ -43,15 +43,23 @@
 |                                      | of bounds access in ``__getitem__``, |
 |                                      | ``__setitem__``, etc.)               |
 +--------------------------------------+--------------------------------------+
-| :class:`pybind11::value_error`       | ``ValueError`` (used to indicate     |
-|                                      | wrong value passed in                |
-|                                      | ``container.remove(...)``)           |
-+--------------------------------------+--------------------------------------+
 | :class:`pybind11::key_error`         | ``KeyError`` (used to indicate out   |
 |                                      | of bounds access in ``__getitem__``, |
 |                                      | ``__setitem__`` in dict-like         |
 |                                      | objects, etc.)                       |
 +--------------------------------------+--------------------------------------+
+| :class:`pybind11::value_error`       | ``ValueError`` (used to indicate     |
+|                                      | wrong value passed in                |
+|                                      | ``container.remove(...)``)           |
++--------------------------------------+--------------------------------------+
+| :class:`pybind11::type_error`        | ``TypeError``                        |
++--------------------------------------+--------------------------------------+
+| :class:`pybind11::buffer_error`      | ``BufferError``                      |
++--------------------------------------+--------------------------------------+
+| :class:`pybind11::import_error`      | ``import_error``                     |
++--------------------------------------+--------------------------------------+
+| Any other exception                  | ``RuntimeError``                     |
++--------------------------------------+--------------------------------------+
 
 Exception translation is not bidirectional. That is, *catching* the C++
 exceptions defined above above will not trap exceptions that originate from
@@ -188,7 +196,7 @@
     } catch (py::error_already_set &e) {
         if (e.matches(PyExc_FileNotFoundError)) {
             py::print("missing.txt not found");
-        } else if (e.match(PyExc_PermissionError)) {
+        } else if (e.matches(PyExc_PermissionError)) {
             py::print("missing.txt found but not accessible");
         } else {
             throw;
diff --git a/docs/advanced/functions.rst b/docs/advanced/functions.rst
index ebdff9c..55a40a5 100644
--- a/docs/advanced/functions.rst
+++ b/docs/advanced/functions.rst
@@ -524,6 +524,8 @@
     not allow ``None`` as argument.  To pass optional argument of these copied types consider
     using ``std::optional<T>``
 
+.. _overload_resolution:
+
 Overload resolution order
 =========================
 
diff --git a/docs/advanced/pycpp/numpy.rst b/docs/advanced/pycpp/numpy.rst
index 19ed10b..0a81aa8 100644
--- a/docs/advanced/pycpp/numpy.rst
+++ b/docs/advanced/pycpp/numpy.rst
@@ -150,8 +150,10 @@
 
 When it is invoked with a different type (e.g. an integer or a list of
 integers), the binding code will attempt to cast the input into a NumPy array
-of the requested type. Note that this feature requires the
-:file:`pybind11/numpy.h` header to be included.
+of the requested type. This feature requires the :file:`pybind11/numpy.h`
+header to be included. Note that :file:`pybind11/numpy.h` does not depend on
+the NumPy headers, and thus can be used without declaring a build-time
+dependency on NumPy; NumPy>=1.7.0 is a runtime dependency.
 
 Data in NumPy arrays is not guaranteed to packed in a dense manner;
 furthermore, entries can be separated by arbitrary column and row strides.
diff --git a/docs/basics.rst b/docs/basics.rst
index b9d386c..0b1d85c 100644
--- a/docs/basics.rst
+++ b/docs/basics.rst
@@ -136,7 +136,14 @@
 
 .. code-block:: bash
 
-    $ c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` example.cpp -o example`python3-config --extension-suffix`
+    $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix)
+
+.. note::
+
+    If you used :ref:`include_as_a_submodule` to get the pybind11 source, then
+    use ``$(python3-config --includes) -Iextern/pybind11/include`` instead of
+    ``$(python3 -m pybind11 --includes)`` in the above compilation, as
+    explained in :ref:`building_manually`.
 
 For more details on the required compiler flags on Linux and macOS, see
 :ref:`building_manually`. For complete cross-platform compilation instructions,
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 561baa5..157b296 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -6,6 +6,127 @@
 Starting with version 1.8.0, pybind11 releases use a `semantic versioning
 <http://semver.org>`_ policy.
 
+
+v2.6.2 (Jan 26, 2021)
+---------------------
+
+Minor missing functionality added:
+
+* enum: add missing Enum.value property.
+  `#2739 <https://github.com/pybind/pybind11/pull/2739>`_
+
+* Allow thread termination to be avoided during shutdown for CPython 3.7+ via
+  ``.disarm`` for ``gil_scoped_acquire``/``gil_scoped_release``.
+  `#2657 <https://github.com/pybind/pybind11/pull/2657>`_
+
+Fixed or improved behavior in a few special cases:
+
+* Fix bug where the constructor of ``object`` subclasses would not throw on
+  being passed a Python object of the wrong type.
+  `#2701 <https://github.com/pybind/pybind11/pull/2701>`_
+
+* The ``type_caster`` for integers does not convert Python objects with
+  ``__int__`` anymore with ``noconvert`` or during the first round of trying
+  overloads.
+  `#2698 <https://github.com/pybind/pybind11/pull/2698>`_
+
+* When casting to a C++ integer, ``__index__`` is always called and not
+  considered as conversion, consistent with Python 3.8+.
+  `#2801 <https://github.com/pybind/pybind11/pull/2801>`_
+
+Build improvements:
+
+* Setup helpers: ``extra_compile_args`` and ``extra_link_args`` automatically set by
+  Pybind11Extension are now prepended, which allows them to be overridden
+  by user-set ``extra_compile_args`` and ``extra_link_args``.
+  `#2808 <https://github.com/pybind/pybind11/pull/2808>`_
+
+* Setup helpers: Don't trigger unused parameter warning.
+  `#2735 <https://github.com/pybind/pybind11/pull/2735>`_
+
+* CMake: Support running with ``--warn-uninitialized`` active.
+  `#2806 <https://github.com/pybind/pybind11/pull/2806>`_
+
+* CMake: Avoid error if included from two submodule directories.
+  `#2804 <https://github.com/pybind/pybind11/pull/2804>`_
+
+* CMake: Fix ``STATIC`` / ``SHARED`` being ignored in FindPython mode.
+  `#2796 <https://github.com/pybind/pybind11/pull/2796>`_
+
+* CMake: Respect the setting for ``CMAKE_CXX_VISIBILITY_PRESET`` if defined.
+  `#2793 <https://github.com/pybind/pybind11/pull/2793>`_
+
+* CMake: Fix issue with FindPython2/FindPython3 not working with ``pybind11::embed``.
+  `#2662 <https://github.com/pybind/pybind11/pull/2662>`_
+
+* CMake: mixing local and installed pybind11's would prioritize the installed
+  one over the local one (regression in 2.6.0).
+  `#2716 <https://github.com/pybind/pybind11/pull/2716>`_
+
+
+Bug fixes:
+
+* Fixed segfault in multithreaded environments when using
+  ``scoped_ostream_redirect``.
+  `#2675 <https://github.com/pybind/pybind11/pull/2675>`_
+
+* Leave docstring unset when all docstring-related options are disabled, rather
+  than set an empty string.
+  `#2745 <https://github.com/pybind/pybind11/pull/2745>`_
+
+* The module key in builtins that pybind11 uses to store its internals changed
+  from std::string to a python str type (more natural on Python 2, no change on
+  Python 3).
+  `#2814 <https://github.com/pybind/pybind11/pull/2814>`_
+
+* Fixed assertion error related to unhandled (later overwritten) exception in
+  CPython 3.8 and 3.9 debug builds.
+  `#2685 <https://github.com/pybind/pybind11/pull/2685>`_
+
+* Fix ``py::gil_scoped_acquire`` assert with CPython 3.9 debug build.
+  `#2683 <https://github.com/pybind/pybind11/pull/2683>`_
+
+* Fix issue with a test failing on PyTest 6.2.
+  `#2741 <https://github.com/pybind/pybind11/pull/2741>`_
+
+Warning fixes:
+
+* Fix warning modifying constructor parameter 'flag' that shadows a field of
+  'set_flag' ``[-Wshadow-field-in-constructor-modified]``.
+  `#2780 <https://github.com/pybind/pybind11/pull/2780>`_
+
+* Suppressed some deprecation warnings about old-style
+  ``__init__``/``__setstate__`` in the tests.
+  `#2759 <https://github.com/pybind/pybind11/pull/2759>`_
+
+Valgrind work:
+
+* Fix invalid access when calling a pybind11 ``__init__`` on a non-pybind11
+  class instance.
+  `#2755 <https://github.com/pybind/pybind11/pull/2755>`_
+
+* Fixed various minor memory leaks in pybind11's test suite.
+  `#2758 <https://github.com/pybind/pybind11/pull/2758>`_
+
+* Resolved memory leak in cpp_function initialization when exceptions occurred.
+  `#2756 <https://github.com/pybind/pybind11/pull/2756>`_
+
+* Added a Valgrind build, checking for leaks and memory-related UB, to CI.
+  `#2746 <https://github.com/pybind/pybind11/pull/2746>`_
+
+Compiler support:
+
+* Intel compiler was not activating C++14 support due to a broken define.
+  `#2679 <https://github.com/pybind/pybind11/pull/2679>`_
+
+* Support ICC and NVIDIA HPC SDK in C++17 mode.
+  `#2729 <https://github.com/pybind/pybind11/pull/2729>`_
+
+* Support Intel OneAPI compiler (ICC 20.2) and add to CI.
+  `#2573 <https://github.com/pybind/pybind11/pull/2573>`_
+
+
+
 v2.6.1 (Nov 11, 2020)
 ---------------------
 
@@ -14,7 +135,7 @@
   and ``eval`` in pure Python.
   `#2616 <https://github.com/pybind/pybind11/pull/2616>`_
 
-* ``setup_helpers`` will no longer set a minimum macOS version lower than the
+* ``setup_helpers`` will no longer set a minimum macOS version higher than the
   current version.
   `#2622 <https://github.com/pybind/pybind11/pull/2622>`_
 
diff --git a/docs/compiling.rst b/docs/compiling.rst
index f26e6cf..3a8a270 100644
--- a/docs/compiling.rst
+++ b/docs/compiling.rst
@@ -84,10 +84,11 @@
 
 The argument is the name of an environment variable to control the number of
 threads, such as ``NPY_NUM_BUILD_JOBS`` (as used by NumPy), though you can set
-something different if you want. You can also pass ``default=N`` to set the
-default number of threads (0 will take the number of threads available) and
-``max=N``, the maximum number of threads; if you have a large extension you may
-want set this to a memory dependent number.
+something different if you want; ``CMAKE_BUILD_PARALLEL_LEVEL`` is another choice
+a user might expect. You can also pass ``default=N`` to set the default number
+of threads (0 will take the number of threads available) and ``max=N``, the
+maximum number of threads; if you have a large extension you may want set this
+to a memory dependent number.
 
 If you are developing rapidly and have a lot of C++ files, you may want to
 avoid rebuilding files that have not changed. For simple cases were you are
@@ -136,7 +137,7 @@
 .. code-block:: toml
 
     [build-system]
-    requires = ["setuptools", "wheel", "pybind11==2.6.0"]
+    requires = ["setuptools>=42", "wheel", "pybind11~=2.6.1"]
     build-backend = "setuptools.build_meta"
 
 .. note::
@@ -147,10 +148,12 @@
     in Python) using something like `cibuildwheel`_, remember that ``setup.py``
     and ``pyproject.toml`` are not even contained in the wheel, so this high
     Pip requirement is only for source builds, and will not affect users of
-    your binary wheels.
+    your binary wheels. If you are building SDists and wheels, then
+    `pypa-build`_ is the recommended offical tool.
 
 .. _PEP 517: https://www.python.org/dev/peps/pep-0517/
 .. _cibuildwheel: https://cibuildwheel.readthedocs.io
+.. _pypa-build: https://pypa-build.readthedocs.io/en/latest/
 
 .. _setup_helpers-setup_requires:
 
@@ -401,13 +404,14 @@
 FindPython mode
 ---------------
 
-CMake 3.12+ (3.15+ recommended) added a new module called FindPython that had a
-highly improved search algorithm and modern targets and tools. If you use
-FindPython, pybind11 will detect this and use the existing targets instead:
+CMake 3.12+ (3.15+ recommended, 3.18.2+ ideal) added a new module called
+FindPython that had a highly improved search algorithm and modern targets
+and tools. If you use FindPython, pybind11 will detect this and use the
+existing targets instead:
 
 .. code-block:: cmake
 
-    cmake_minumum_required(VERSION 3.15...3.18)
+    cmake_minumum_required(VERSION 3.15...3.19)
     project(example LANGUAGES CXX)
 
     find_package(Python COMPONENTS Interpreter Development REQUIRED)
@@ -433,6 +437,14 @@
 virtualenv/venv support, and Conda support, this tends to find the correct
 Python version more often than the old system did).
 
+.. warning::
+
+    When the Python libraries (i.e. ``libpythonXX.a`` and ``libpythonXX.so``
+    on Unix) are not available, as is the case on a manylinux image, the
+    ``Development`` component will not be resolved by ``FindPython``. When not
+    using the embedding functionality, CMake 3.18+ allows you to specify
+    ``Development.Module`` instead of ``Development`` to resolve this issue.
+
 .. versionadded:: 2.6
 
 Advanced: interface library targets
@@ -557,7 +569,7 @@
 
 .. code-block:: bash
 
-    $ c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` example.cpp -o example`python3-config --extension-suffix`
+    $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix)
 
 The flags given here assume that you're using Python 3. For Python 2, just
 change the executable appropriately (to ``python`` or ``python2``).
@@ -569,7 +581,7 @@
 ``python3-config --includes``.
 
 Note that Python 2.7 modules don't use a special suffix, so you should simply
-use ``example.so`` instead of ``example`python3-config --extension-suffix```.
+use ``example.so`` instead of ``example$(python3-config --extension-suffix)``.
 Besides, the ``--extension-suffix`` option may or may not be available, depending
 on the distribution; in the latter case, the module extension can be manually
 set to ``.so``.
@@ -580,7 +592,7 @@
 
 .. code-block:: bash
 
-    $ c++ -O3 -Wall -shared -std=c++11 -undefined dynamic_lookup `python3 -m pybind11 --includes` example.cpp -o example`python3-config --extension-suffix`
+    $ c++ -O3 -Wall -shared -std=c++11 -undefined dynamic_lookup $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix)
 
 In general, it is advisable to include several additional build parameters
 that can considerably reduce the size of the created binary. Refer to section
diff --git a/docs/conf.py b/docs/conf.py
index 66db310..6ac054c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -239,6 +239,8 @@
 
 # -- Options for LaTeX output ---------------------------------------------
 
+latex_engine = "pdflatex"
+
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
     # 'papersize': 'letterpaper',
@@ -353,8 +355,11 @@
     with open(DIR.parent / "README.rst") as f:
         contents = f.read()
 
-    # Filter out section titles for index.rst for LaTeX
     if app.builder.name == "latex":
+        # Remove badges and stuff from start
+        contents = contents[contents.find(r".. start") :]
+
+        # Filter out section titles for index.rst for LaTeX
         contents = re.sub(r"^(.*)\n[-~]{3,}$", r"**\1**", contents, flags=re.MULTILINE)
 
     with open(DIR / "readme.rst", "w") as f:
diff --git a/docs/installing.rst b/docs/installing.rst
index 2597285..30b9f18 100644
--- a/docs/installing.rst
+++ b/docs/installing.rst
@@ -8,6 +8,8 @@
 developers recommend one of the first three ways listed here, submodule, PyPI,
 or conda-forge, for obtaining pybind11.
 
+.. _include_as_a_submodule:
+
 Include as a submodule
 ======================
 
@@ -16,7 +18,7 @@
 
 .. code-block:: bash
 
-    git submodule add ../../pybind/pybind11 extern/pybind11 -b stable
+    git submodule add -b stable ../../pybind/pybind11 extern/pybind11
     git submodule update --init
 
 This assumes you are placing your dependencies in ``extern/``, and that you are
diff --git a/docs/release.rst b/docs/release.rst
index 43f502a..9dbff03 100644
--- a/docs/release.rst
+++ b/docs/release.rst
@@ -25,12 +25,17 @@
   - Update ``PYBIND11_VERSION_MAJOR`` etc. in
     ``include/pybind11/detail/common.h``. PATCH should be a simple integer.
   - Update ``pybind11/_version.py`` (match above)
-  - Ensure that all the information in ``setup.py`` is up-to-date.
+  - Ensure that all the information in ``setup.cfg`` is up-to-date, like
+    supported Python versions.
   - Add release date in ``docs/changelog.rst``.
+      - Check to make sure
+        `needs-changelog <https://github.com/pybind/pybind11/pulls?q=is%3Apr+is%3Aclosed+label%3A%22needs+changelog%22>`_
+        issues are entered in the changelog (clear the label when done).
   - ``git add`` and ``git commit``, ``git push``. **Ensure CI passes**. (If it
     fails due to a known flake issue, either ignore or restart CI.)
-- Add a release branch if this is a new minor version
-  - ``git checkout -b vX.Y``, ``git push -u origin vX.Y``
+- Add a release branch if this is a new minor version, or update the existing release branch if it is a patch version
+  - New branch: ``git checkout -b vX.Y``, ``git push -u origin vX.Y``
+  - Update branch: ``git checkout vX.Y``, ``git merge <release branch>``, ``git push``
 - Update tags (optional; if you skip this, the GitHub release makes a
   non-annotated tag for you)
   - ``git tag -a vX.Y.Z -m 'vX.Y.Z release'``.
@@ -47,7 +52,8 @@
     name (if you didn't tag above, it will be made here), fill in a release
     name like "Version X.Y.Z", and optionally copy-and-paste the changelog into
     the description (processed as markdown by Pandoc). Check "pre-release" if
-    this is a beta/RC.
+    this is a beta/RC. You can get partway there with
+    ``cat docs/changelog.rst | pandsoc -f rst -t markdown``.
   - CLI method: with ``gh`` installed, run ``gh release create vX.Y.Z -t "Version X.Y.Z"``
     If this is a pre-release, add ``-p``.
 
@@ -56,11 +62,20 @@
   - Update version macros in ``include/pybind11/detail/common.h`` (set PATCH to
     ``0.dev1`` and increment MINOR).
   - Update ``_version.py`` to match
-  - Add a plot for in-development updates in ``docs/changelog.rst``.
+  - Add a spot for in-development updates in ``docs/changelog.rst``.
   - ``git add``, ``git commit``, ``git push``
 
 If a version branch is updated, remember to set PATCH to ``1.dev1``.
 
+If you'd like to bump homebrew, run:
+
+.. code-block::
+
+    brew bump-formula-pr --url https://github.com/pybind/pybind11/archive/vX.Y.Z.tar.gz
+
+Conda-forge should automatically make a PR in a few hours, and automatically
+merge it if there are no issues.
+
 
 Manual packaging
 ^^^^^^^^^^^^^^^^
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 35366e3..9e6a3f6 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,7 @@
-breathe==4.20.0
+breathe==4.25.1
 commonmark==0.9.1
-recommonmark==0.6.0
-sphinx==3.2.1
+recommonmark==0.7.1
+sphinx==3.3.1
 sphinx_rtd_theme==0.5.0
-sphinxcontrib-moderncmakedomain==3.13
+sphinxcontrib-moderncmakedomain==3.17
 sphinxcontrib-svg2pdfconverter==1.1.0
diff --git a/docs/upgrade.rst b/docs/upgrade.rst
index 87bcebe..f13fbcf 100644
--- a/docs/upgrade.rst
+++ b/docs/upgrade.rst
@@ -192,7 +192,7 @@
         ...
         .def(py::pickle(
             [](const Foo &self) { // __getstate__
-                return py::make_tuple(f.value1(), f.value2(), ...); // unchanged
+                return py::make_tuple(self.value1(), self.value2(), ...); // unchanged
             },
             [](py::tuple t) { // __setstate__, note: no `self` argument
                 return new Foo(t[0].cast<std::string>(), ...);
diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h
index 0c41670..50efdc7 100644
--- a/include/pybind11/attr.h
+++ b/include/pybind11/attr.h
@@ -544,7 +544,7 @@
           size_t named = constexpr_sum(std::is_base_of<arg, Extra>::value...),
           size_t self  = constexpr_sum(std::is_same<is_method, Extra>::value...)>
 constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) {
-    return named == 0 || (self + named + has_args + has_kwargs) == nargs;
+    return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs;
 }
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 11c61a4..0caccdb 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -960,9 +960,14 @@
 private:
     using caster_t = make_caster<type>;
     caster_t subcaster;
-    using subcaster_cast_op_type = typename caster_t::template cast_op_type<type>;
-    static_assert(std::is_same<typename std::remove_const<type>::type &, subcaster_cast_op_type>::value,
-            "std::reference_wrapper<T> caster requires T to have a caster with an `T &` operator");
+    using reference_t = type&;
+    using subcaster_cast_op_type =
+        typename caster_t::template cast_op_type<reference_t>;
+
+    static_assert(std::is_same<typename std::remove_const<type>::type &, subcaster_cast_op_type>::value ||
+                  std::is_same<reference_t, subcaster_cast_op_type>::value,
+                  "std::reference_wrapper<T> caster requires T to have a caster with an "
+                  "`operator T &()` or `operator const T &()`");
 public:
     bool load(handle src, bool convert) { return subcaster.load(src, convert); }
     static constexpr auto name = caster_t::name;
@@ -973,7 +978,7 @@
         return caster_t::cast(&src.get(), policy, parent);
     }
     template <typename T> using cast_op_type = std::reference_wrapper<type>;
-    operator std::reference_wrapper<type>() { return subcaster.operator subcaster_cast_op_type&(); }
+    operator std::reference_wrapper<type>() { return cast_op<type &>(subcaster); }
 };
 
 #define PYBIND11_TYPE_CASTER(type, py_name) \
@@ -1020,6 +1025,14 @@
         if (!src)
             return false;
 
+#if !defined(PYPY_VERSION)
+        auto index_check = [](PyObject *o) { return PyIndex_Check(o); };
+#else
+        // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`,
+        // while CPython only considers the existence of `nb_index`/`__index__`.
+        auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); };
+#endif
+
         if (std::is_floating_point<T>::value) {
             if (convert || PyFloat_Check(src.ptr()))
                 py_value = (py_type) PyFloat_AsDouble(src.ptr());
@@ -1027,12 +1040,31 @@
                 return false;
         } else if (PyFloat_Check(src.ptr())) {
             return false;
-        } else if (std::is_unsigned<py_type>::value) {
-            py_value = as_unsigned<py_type>(src.ptr());
-        } else { // signed integer:
-            py_value = sizeof(T) <= sizeof(long)
-                ? (py_type) PyLong_AsLong(src.ptr())
-                : (py_type) PYBIND11_LONG_AS_LONGLONG(src.ptr());
+        } else if (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr())) {
+            return false;
+        } else {
+            handle src_or_index = src;
+#if PY_VERSION_HEX < 0x03080000
+            object index;
+            if (!PYBIND11_LONG_CHECK(src.ptr())) {  // So: index_check(src.ptr())
+                index = reinterpret_steal<object>(PyNumber_Index(src.ptr()));
+                if (!index) {
+                    PyErr_Clear();
+                    if (!convert)
+                        return false;
+                }
+                else {
+                    src_or_index = index;
+                }
+            }
+#endif
+            if (std::is_unsigned<py_type>::value) {
+                py_value = as_unsigned<py_type>(src_or_index.ptr());
+            } else { // signed integer:
+                py_value = sizeof(T) <= sizeof(long)
+                    ? (py_type) PyLong_AsLong(src_or_index.ptr())
+                    : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr());
+            }
         }
 
         // Python API reported an error
@@ -1041,15 +1073,8 @@
         // Check to see if the conversion is valid (integers should match exactly)
         // Signed/unsigned checks happen elsewhere
         if (py_err || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) {
-            bool type_error = py_err && PyErr_ExceptionMatches(
-#if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION)
-                PyExc_SystemError
-#else
-                PyExc_TypeError
-#endif
-            );
             PyErr_Clear();
-            if (type_error && convert && PyNumber_Check(src.ptr())) {
+            if (py_err && convert && PyNumber_Check(src.ptr())) {
                 auto tmp = reinterpret_steal<object>(std::is_floating_point<T>::value
                                                      ? PyNumber_Float(src.ptr())
                                                      : PyNumber_Long(src.ptr()));
@@ -1870,7 +1895,14 @@
 #if !defined(NDEBUG)
         , type(type_id<T>())
 #endif
-    { }
+    {
+        // Workaround! See:
+        // https://github.com/pybind/pybind11/issues/2336
+        // https://github.com/pybind/pybind11/pull/2685#issuecomment-731286700
+        if (PyErr_Occurred()) {
+            PyErr_Clear();
+        }
+    }
 
 public:
     /// Direct construction with name, default, and description
@@ -2160,16 +2192,26 @@
     dict m_kwargs;
 };
 
+// [workaround(intel)] Separate function required here
+// We need to put this into a separate function because the Intel compiler
+// fails to compile enable_if_t<!all_of<is_positional<Args>...>::value>
+// (tested with ICC 2021.1 Beta 20200827).
+template <typename... Args>
+constexpr bool args_are_all_positional()
+{
+  return all_of<is_positional<Args>...>::value;
+}
+
 /// Collect only positional arguments for a Python function call
 template <return_value_policy policy, typename... Args,
-          typename = enable_if_t<all_of<is_positional<Args>...>::value>>
+          typename = enable_if_t<args_are_all_positional<Args...>()>>
 simple_collector<policy> collect_arguments(Args &&...args) {
     return simple_collector<policy>(std::forward<Args>(args)...);
 }
 
 /// Collect all arguments, including keywords and unpacking (only instantiated when needed)
 template <return_value_policy policy, typename... Args,
-          typename = enable_if_t<!all_of<is_positional<Args>...>::value>>
+          typename = enable_if_t<!args_are_all_positional<Args...>()>>
 unpacking_collector<policy> collect_arguments(Args &&...args) {
     // Following argument order rules for generalized unpacking according to PEP 448
     static_assert(
diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h
index 65dad5a..2f414e5 100644
--- a/include/pybind11/detail/class.h
+++ b/include/pybind11/detail/class.h
@@ -340,8 +340,6 @@
     // Allocate the value/holder internals:
     inst->allocate_layout();
 
-    inst->owned = true;
-
     return self;
 }
 
@@ -552,6 +550,12 @@
     }
     std::memset(view, 0, sizeof(Py_buffer));
     buffer_info *info = tinfo->get_buffer(obj, tinfo->get_buffer_data);
+    if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
+        delete info;
+        // view->obj = nullptr;  // Was just memset to 0, so not necessary
+        PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
+        return -1;
+    }
     view->obj = obj;
     view->ndim = 1;
     view->internal = info;
@@ -561,12 +565,6 @@
     for (auto s : info->shape)
         view->len *= s;
     view->readonly = info->readonly;
-    if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
-        if (view)
-            view->obj = nullptr;
-        PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
-        return -1;
-    }
     if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
         view->format = const_cast<char *>(info->format.c_str());
     if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h
index 751141a..de495e4 100644
--- a/include/pybind11/detail/common.h
+++ b/include/pybind11/detail/common.h
@@ -11,7 +11,7 @@
 
 #define PYBIND11_VERSION_MAJOR 2
 #define PYBIND11_VERSION_MINOR 6
-#define PYBIND11_VERSION_PATCH 1
+#define PYBIND11_VERSION_PATCH 2
 
 #define PYBIND11_NAMESPACE_BEGIN(name) namespace name {
 #define PYBIND11_NAMESPACE_END(name) }
@@ -27,7 +27,7 @@
 #  endif
 #endif
 
-#if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER)
+#if !(defined(_MSC_VER) && __cplusplus == 199711L)
 #  if __cplusplus >= 201402L
 #    define PYBIND11_CPP14
 #    if __cplusplus >= 201703L
@@ -49,6 +49,8 @@
 #if defined(__INTEL_COMPILER)
 #  if __INTEL_COMPILER < 1800
 #    error pybind11 requires Intel C++ compiler v18 or newer
+#  elif __INTEL_COMPILER < 1900 && defined(PYBIND11_CPP14)
+#    error pybind11 supports only C++11 with Intel C++ compiler v18. Use v19 or newer for C++14.
 #  endif
 #elif defined(__clang__) && !defined(__apple_build_version__)
 #  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
@@ -663,6 +665,10 @@
     std::is_pointer<T>::value && std::is_function<typename std::remove_pointer<T>::type>::value>;
 
 template <typename F> struct strip_function_object {
+    // If you are encountering an
+    // 'error: name followed by "::" must be a class or namespace name'
+    // with the Intel compiler and a noexcept function here,
+    // try to use noexcept(true) instead of plain noexcept.
     using type = typename remove_class<decltype(&F::operator())>::type;
 };
 
@@ -687,8 +693,10 @@
 /// Ignore that a variable is unused in compiler warnings
 inline void ignore_unused(const int *) { }
 
+// [workaround(intel)] Internal error on fold expression
 /// Apply a function over each element of a parameter pack
-#ifdef __cpp_fold_expressions
+#if defined(__cpp_fold_expressions) && !defined(__INTEL_COMPILER)
+// Intel compiler produces an internal error on this fold expression (tested with ICC 19.0.2)
 #define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...)
 #else
 using expand_side_effects = bool[];
diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h
index a455715..75fcd3c 100644
--- a/include/pybind11/detail/internals.h
+++ b/include/pybind11/detail/internals.h
@@ -112,7 +112,7 @@
     PyInterpreterState *istate = nullptr;
     ~internals() {
         // This destructor is called *after* Py_Finalize() in finalize_interpreter().
-        // That *SHOULD BE* fine. The following details what happens whe PyThread_tss_free is called.
+        // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called.
         // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
         // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
         // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
@@ -266,7 +266,7 @@
         const PyGILState_STATE state;
     } gil;
 
-    constexpr auto *id = PYBIND11_INTERNALS_ID;
+    PYBIND11_STR_TYPE id(PYBIND11_INTERNALS_ID);
     auto builtins = handle(PyEval_GetBuiltins());
     if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
         internals_pp = static_cast<internals **>(capsule(builtins[id]));
diff --git a/include/pybind11/iostream.h b/include/pybind11/iostream.h
index 5e9a814..9dee755 100644
--- a/include/pybind11/iostream.h
+++ b/include/pybind11/iostream.h
@@ -43,16 +43,20 @@
     // simplified to a fully qualified call.
     int _sync() {
         if (pbase() != pptr()) {
-            // This subtraction cannot be negative, so dropping the sign
-            str line(pbase(), static_cast<size_t>(pptr() - pbase()));
 
             {
                 gil_scoped_acquire tmp;
+
+                // This subtraction cannot be negative, so dropping the sign.
+                str line(pbase(), static_cast<size_t>(pptr() - pbase()));
+
                 pywrite(line);
                 pyflush();
+
+                // Placed inside gil_scoped_aquire as a mutex to avoid a race
+                setp(pbase(), epptr());
             }
 
-            setp(pbase(), epptr());
         }
         return 0;
     }
@@ -102,8 +106,8 @@
     .. code-block:: cpp
 
         {
-            py::scoped_ostream_redirect output{std::cerr, py::module_::import("sys").attr("stderr")};
-            std::cerr << "Hello, World!";
+            py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")};
+            std::cout << "Hello, World!";
         }
  \endrst */
 class scoped_ostream_redirect {
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index e2ddda0..3bffbb2 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -114,9 +114,16 @@
     object name() const { return attr("__name__"); }
 
 protected:
+    struct InitializingFunctionRecordDeleter {
+        // `destruct(function_record, false)`: `initialize_generic` copies strings and
+        // takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`.
+        void operator()(detail::function_record * rec) { destruct(rec, false); }
+    };
+    using unique_function_record = std::unique_ptr<detail::function_record, InitializingFunctionRecordDeleter>;
+
     /// Space optimization: don't inline this frequently instantiated fragment
-    PYBIND11_NOINLINE detail::function_record *make_function_record() {
-        return new detail::function_record();
+    PYBIND11_NOINLINE unique_function_record make_function_record() {
+        return unique_function_record(new detail::function_record());
     }
 
     /// Special internal constructor for functors, lambda functions, etc.
@@ -126,7 +133,9 @@
         struct capture { remove_reference_t<Func> f; };
 
         /* Store the function including any extra state it might have (e.g. a lambda capture object) */
-        auto rec = make_function_record();
+        // The unique_ptr makes sure nothing is leaked in case of an exception.
+        auto unique_rec = make_function_record();
+        auto rec = unique_rec.get();
 
         /* Store the capture object directly in the function record if there is enough space */
         if (sizeof(capture) <= sizeof(rec->data)) {
@@ -207,7 +216,8 @@
         PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
 
         /* Register the function with Python from generic (non-templated) code */
-        initialize_generic(rec, signature.text, types.data(), sizeof...(Args));
+        // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid.
+        initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args));
 
         if (cast_in::has_args) rec->has_args = true;
         if (cast_in::has_kwargs) rec->has_kwargs = true;
@@ -223,20 +233,51 @@
         }
     }
 
+    // Utility class that keeps track of all duplicated strings, and cleans them up in its destructor,
+    // unless they are released. Basically a RAII-solution to deal with exceptions along the way.
+    class strdup_guard {
+    public:
+        ~strdup_guard() {
+            for (auto s : strings)
+                std::free(s);
+        }
+        char *operator()(const char *s) {
+            auto t = strdup(s);
+            strings.push_back(t);
+            return t;
+        }
+        void release() {
+            strings.clear();
+        }
+    private:
+        std::vector<char *> strings;
+    };
+
     /// Register a function call with Python (generic non-templated code goes here)
-    void initialize_generic(detail::function_record *rec, const char *text,
+    void initialize_generic(unique_function_record &&unique_rec, const char *text,
                             const std::type_info *const *types, size_t args) {
+        // Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr,
+        // we do not want this to destuct the pointer. `initialize` (the caller) still relies on the
+        // pointee being alive after this call. Only move out if a `capsule` is going to keep it alive.
+        auto rec = unique_rec.get();
+
+        // Keep track of strdup'ed strings, and clean them up as long as the function's capsule
+        // has not taken ownership yet (when `unique_rec.release()` is called).
+        // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings
+        // are only referenced before strdup'ing. So only *after* the following block could `destruct`
+        // safely be called, but even then, `repr` could still throw in the middle of copying all strings.
+        strdup_guard guarded_strdup;
 
         /* Create copies of all referenced C-style strings */
-        rec->name = strdup(rec->name ? rec->name : "");
-        if (rec->doc) rec->doc = strdup(rec->doc);
+        rec->name = guarded_strdup(rec->name ? rec->name : "");
+        if (rec->doc) rec->doc = guarded_strdup(rec->doc);
         for (auto &a: rec->args) {
             if (a.name)
-                a.name = strdup(a.name);
+                a.name = guarded_strdup(a.name);
             if (a.descr)
-                a.descr = strdup(a.descr);
+                a.descr = guarded_strdup(a.descr);
             else if (a.value)
-                a.descr = strdup(repr(a.value).cast<std::string>().c_str());
+                a.descr = guarded_strdup(repr(a.value).cast<std::string>().c_str());
         }
 
         rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__");
@@ -319,13 +360,13 @@
 #if PY_MAJOR_VERSION < 3
         if (strcmp(rec->name, "__next__") == 0) {
             std::free(rec->name);
-            rec->name = strdup("next");
+            rec->name = guarded_strdup("next");
         } else if (strcmp(rec->name, "__bool__") == 0) {
             std::free(rec->name);
-            rec->name = strdup("__nonzero__");
+            rec->name = guarded_strdup("__nonzero__");
         }
 #endif
-        rec->signature = strdup(signature.c_str());
+        rec->signature = guarded_strdup(signature.c_str());
         rec->args.shrink_to_fit();
         rec->nargs = (std::uint16_t) args;
 
@@ -356,9 +397,10 @@
             rec->def->ml_meth = reinterpret_cast<PyCFunction>(reinterpret_cast<void (*) (void)>(*dispatcher));
             rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
 
-            capsule rec_capsule(rec, [](void *ptr) {
+            capsule rec_capsule(unique_rec.release(), [](void *ptr) {
                 destruct((detail::function_record *) ptr);
             });
+            guarded_strdup.release();
 
             object scope_module;
             if (rec->scope) {
@@ -393,13 +435,15 @@
                 chain_start = rec;
                 rec->next = chain;
                 auto rec_capsule = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
-                rec_capsule.set_pointer(rec);
+                rec_capsule.set_pointer(unique_rec.release());
+                guarded_strdup.release();
             } else {
                 // Or end of chain (normal behavior)
                 chain_start = chain;
                 while (chain->next)
                     chain = chain->next;
-                chain->next = rec;
+                chain->next = unique_rec.release();
+                guarded_strdup.release();
             }
         }
 
@@ -439,9 +483,9 @@
 
         /* Install docstring */
         auto *func = (PyCFunctionObject *) m_ptr;
-        if (func->m_ml->ml_doc)
-            std::free(const_cast<char *>(func->m_ml->ml_doc));
-        func->m_ml->ml_doc = strdup(signatures.c_str());
+        std::free(const_cast<char *>(func->m_ml->ml_doc));
+        // Install docstring if it's non-empty (when at least one option is enabled)
+        func->m_ml->ml_doc = signatures.empty() ? nullptr : strdup(signatures.c_str());
 
         if (rec->is_method) {
             m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr());
@@ -452,7 +496,7 @@
     }
 
     /// When a cpp_function is GCed, release any memory allocated by pybind11
-    static void destruct(detail::function_record *rec) {
+    static void destruct(detail::function_record *rec, bool free_strings = true) {
         // If on Python 3.9, check the interpreter "MICRO" (patch) version.
         // If this is running on 3.9.0, we have to work around a bug.
         #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
@@ -463,14 +507,20 @@
             detail::function_record *next = rec->next;
             if (rec->free_data)
                 rec->free_data(rec);
-            std::free((char *) rec->name);
-            std::free((char *) rec->doc);
-            std::free((char *) rec->signature);
-            for (auto &arg: rec->args) {
-                std::free(const_cast<char *>(arg.name));
-                std::free(const_cast<char *>(arg.descr));
-                arg.value.dec_ref();
+            // During initialization, these strings might not have been copied yet,
+            // so they cannot be freed. Once the function has been created, they can.
+            // Check `make_function_record` for more details.
+            if (free_strings) {
+                std::free((char *) rec->name);
+                std::free((char *) rec->doc);
+                std::free((char *) rec->signature);
+                for (auto &arg: rec->args) {
+                    std::free(const_cast<char *>(arg.name));
+                    std::free(const_cast<char *>(arg.descr));
+                }
             }
+            for (auto &arg: rec->args)
+                arg.value.dec_ref();
             if (rec->def) {
                 std::free(const_cast<char *>(rec->def->ml_doc));
                 // Python 3.9.0 decref's these in the wrong order; rec->def
@@ -504,15 +554,15 @@
 
         auto self_value_and_holder = value_and_holder();
         if (overloads->is_constructor) {
-            const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
-            const auto pi = reinterpret_cast<instance *>(parent.ptr());
-            self_value_and_holder = pi->get_value_and_holder(tinfo, false);
-
-            if (!self_value_and_holder.type || !self_value_and_holder.inst) {
+            if (!PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) {
                 PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid `self` argument");
                 return nullptr;
             }
 
+            const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
+            const auto pi = reinterpret_cast<instance *>(parent.ptr());
+            self_value_and_holder = pi->get_value_and_holder(tinfo, true);
+
             // If this value is already registered it must mean __init__ is invoked multiple times;
             // we really can't support that in C++, so just ignore the second __init__.
             if (self_value_and_holder.instance_registered())
@@ -977,7 +1027,7 @@
     /** \rst
         Create a new top-level module that can be used as the main module of a C extension.
 
-        For Python 3, ``def`` should point to a staticly allocated module_def.
+        For Python 3, ``def`` should point to a statically allocated module_def.
         For Python 2, ``def`` can be a nullptr and is completely ignored.
     \endrst */
     static module_ create_extension_module(const char *name, const char *doc, module_def *def) {
@@ -1005,7 +1055,7 @@
                 throw error_already_set();
             pybind11_fail("Internal error in module_::create_extension_module()");
         }
-        // TODO: Sould be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_...
+        // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_...
         //       For Python 2, reinterpret_borrow is correct.
         return reinterpret_borrow<module_>(m);
     }
@@ -1731,6 +1781,7 @@
         m_base.init(is_arithmetic, is_convertible);
 
         def(init([](Scalar i) { return static_cast<Type>(i); }), arg("value"));
+        def_property_readonly("value", [](Type value) { return (Scalar) value; });
         def("__int__", [](Type value) { return (Scalar) value; });
         #if PY_MAJOR_VERSION < 3
             def("__long__", [](Type value) { return (Scalar) value; });
@@ -1909,7 +1960,7 @@
 template <typename InputType, typename OutputType> void implicitly_convertible() {
     struct set_flag {
         bool &flag;
-        set_flag(bool &flag) : flag(flag) { flag = true; }
+        set_flag(bool &flag_) : flag(flag_) { flag_ = true; }
         ~set_flag() { flag = false; }
     };
     auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * {
@@ -2089,15 +2140,7 @@
         }
 
         if (release) {
-            /* Work around an annoying assertion in PyThreadState_Swap */
-            #if defined(Py_DEBUG)
-                PyInterpreterState *interp = tstate->interp;
-                tstate->interp = nullptr;
-            #endif
             PyEval_AcquireThread(tstate);
-            #if defined(Py_DEBUG)
-                tstate->interp = interp;
-            #endif
         }
 
         inc_ref();
@@ -2121,12 +2164,22 @@
                     pybind11_fail("scoped_acquire::dec_ref(): internal error!");
             #endif
             PyThreadState_Clear(tstate);
-            PyThreadState_DeleteCurrent();
+            if (active)
+                PyThreadState_DeleteCurrent();
             PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
             release = false;
         }
     }
 
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() {
+        active = false;
+    }
+
     PYBIND11_NOINLINE ~gil_scoped_acquire() {
         dec_ref();
         if (release)
@@ -2135,6 +2188,7 @@
 private:
     PyThreadState *tstate = nullptr;
     bool release = true;
+    bool active = true;
 };
 
 class gil_scoped_release {
@@ -2150,10 +2204,22 @@
             PYBIND11_TLS_DELETE_VALUE(key);
         }
     }
+
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() {
+        active = false;
+    }
+
     ~gil_scoped_release() {
         if (!tstate)
             return;
-        PyEval_RestoreThread(tstate);
+        // `PyEval_RestoreThread()` should not be called if runtime is finalizing
+        if (active)
+            PyEval_RestoreThread(tstate);
         if (disassoc) {
             auto key = detail::get_internals().tstate;
             PYBIND11_TLS_REPLACE_VALUE(key, tstate);
@@ -2162,6 +2228,7 @@
 private:
     PyThreadState *tstate;
     bool disassoc;
+    bool active = true;
 };
 #elif defined(PYPY_VERSION)
 class gil_scoped_acquire {
@@ -2169,6 +2236,7 @@
 public:
     gil_scoped_acquire() { state = PyGILState_Ensure(); }
     ~gil_scoped_acquire() { PyGILState_Release(state); }
+    void disarm() {}
 };
 
 class gil_scoped_release {
@@ -2176,10 +2244,15 @@
 public:
     gil_scoped_release() { state = PyEval_SaveThread(); }
     ~gil_scoped_release() { PyEval_RestoreThread(state); }
+    void disarm() {}
 };
 #else
-class gil_scoped_acquire { };
-class gil_scoped_release { };
+class gil_scoped_acquire {
+    void disarm() {}
+};
+class gil_scoped_release {
+    void disarm() {}
+};
 #endif
 
 error_already_set::~error_already_set() {
@@ -2254,7 +2327,7 @@
 /** \rst
   Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr.
 
-  :this_ptr: The pointer to the object the overriden method should be retrieved for. This should be
+  :this_ptr: The pointer to the object the overridden method should be retrieved for. This should be
              the first non-trampoline class encountered in the inheritance chain.
   :name: The name of the overridden Python method to retrieve.
   :return: The Python method by this name from the object or an empty function wrapper.
diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index 1010ad7..78db794 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -812,18 +812,18 @@
     : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
     { if (!m_ptr) throw error_already_set(); }
 
-#define PYBIND11_OBJECT_CHECK_FAILED(Name, o) \
+#define PYBIND11_OBJECT_CHECK_FAILED(Name, o_ptr) \
     ::pybind11::type_error("Object of type '" + \
-                           ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o.ptr())) + \
+                           ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o_ptr)) + \
                            "' is not an instance of '" #Name "'")
 
 #define PYBIND11_OBJECT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
     /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
     Name(const object &o) : Parent(o) \
-    { if (o && !check_(o)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, o); } \
+    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); } \
     Name(object &&o) : Parent(std::move(o)) \
-    { if (o && !check_(o)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, o); }
+    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); }
 
 #define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT(Name, Parent, CheckFun) \
@@ -1271,6 +1271,15 @@
     detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; }
 };
 
+// We need to put this into a separate function because the Intel compiler
+// fails to compile enable_if_t<all_of<is_keyword_or_ds<Args>...>::value> part below
+// (tested with ICC 2021.1 Beta 20200827).
+template <typename... Args>
+constexpr bool args_are_all_keyword_or_ds()
+{
+  return detail::all_of<detail::is_keyword_or_ds<Args>...>::value;
+}
+
 class dict : public object {
 public:
     PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict)
@@ -1278,7 +1287,7 @@
         if (!m_ptr) pybind11_fail("Could not allocate dict object!");
     }
     template <typename... Args,
-              typename = detail::enable_if_t<detail::all_of<detail::is_keyword_or_ds<Args>...>::value>,
+              typename = detail::enable_if_t<args_are_all_keyword_or_ds<Args...>()>,
               // MSVC workaround: it can't compile an out-of-line definition, so defer the collector
               typename collector = detail::deferred_t<detail::unpacking_collector<>, Args...>>
     explicit dict(Args &&...args) : dict(collector(std::forward<Args>(args)...).kwargs()) { }
diff --git a/include/pybind11/stl_bind.h b/include/pybind11/stl_bind.h
index 9d8ed0c..83195ee 100644
--- a/include/pybind11/stl_bind.h
+++ b/include/pybind11/stl_bind.h
@@ -375,10 +375,20 @@
 template <typename Vector>
 struct vector_has_data_and_format<Vector, enable_if_t<std::is_same<decltype(format_descriptor<typename Vector::value_type>::format(), std::declval<Vector>().data()), typename Vector::value_type*>::value>> : std::true_type {};
 
+// [workaround(intel)] Separate function required here
+// Workaround as the Intel compiler does not compile the enable_if_t part below
+// (tested with icc (ICC) 2021.1 Beta 20200827)
+template <typename... Args>
+constexpr bool args_any_are_buffer() {
+    return detail::any_of<std::is_same<Args, buffer_protocol>...>::value;
+}
+
+// [workaround(intel)] Separate function required here
+// [workaround(msvc)] Can't use constexpr bool in return type
+
 // Add the buffer interface to a vector
 template <typename Vector, typename Class_, typename... Args>
-enable_if_t<detail::any_of<std::is_same<Args, buffer_protocol>...>::value>
-vector_buffer(Class_& cl) {
+void vector_buffer_impl(Class_& cl, std::true_type) {
     using T = typename Vector::value_type;
 
     static_assert(vector_has_data_and_format<Vector>::value, "There is not an appropriate format descriptor for this vector");
@@ -416,7 +426,12 @@
 }
 
 template <typename Vector, typename Class_, typename... Args>
-enable_if_t<!detail::any_of<std::is_same<Args, buffer_protocol>...>::value> vector_buffer(Class_&) {}
+void vector_buffer_impl(Class_&, std::false_type) {}
+
+template <typename Vector, typename Class_, typename... Args>
+void vector_buffer(Class_& cl) {
+    vector_buffer_impl<Vector, Class_, Args...>(cl, detail::any_of<std::is_same<Args, buffer_protocol>...>{});
+}
 
 PYBIND11_NAMESPACE_END(detail)
 
diff --git a/pybind11/_version.py b/pybind11/_version.py
index d18535c..f8b795e 100644
--- a/pybind11/_version.py
+++ b/pybind11/_version.py
@@ -8,5 +8,5 @@
         return s
 
 
-__version__ = "2.6.1"
+__version__ = "2.6.2"
 version_info = tuple(_to_int(s) for s in __version__.split("."))
diff --git a/pybind11/setup_helpers.py b/pybind11/setup_helpers.py
index 33605dd..c69064c 100644
--- a/pybind11/setup_helpers.py
+++ b/pybind11/setup_helpers.py
@@ -99,15 +99,14 @@
     this is an ugly old-style class due to Distutils.
     """
 
-    def _add_cflags(self, *flags):
-        for flag in flags:
-            if flag not in self.extra_compile_args:
-                self.extra_compile_args.append(flag)
+    # flags are prepended, so that they can be further overridden, e.g. by
+    # ``extra_compile_args=["-g"]``.
 
-    def _add_lflags(self, *flags):
-        for flag in flags:
-            if flag not in self.extra_link_args:
-                self.extra_link_args.append(flag)
+    def _add_cflags(self, flags):
+        self.extra_compile_args[:0] = flags
+
+    def _add_ldflags(self, flags):
+        self.extra_link_args[:0] = flags
 
     def __init__(self, *args, **kwargs):
 
@@ -139,13 +138,17 @@
         # Have to use the accessor manually to support Python 2 distutils
         Pybind11Extension.cxx_std.__set__(self, cxx_std)
 
+        cflags = []
+        ldflags = []
         if WIN:
-            self._add_cflags("/EHsc", "/bigobj")
+            cflags += ["/EHsc", "/bigobj"]
         else:
-            self._add_cflags("-fvisibility=hidden", "-g0")
+            cflags += ["-fvisibility=hidden", "-g0"]
             if MACOS:
-                self._add_cflags("-stdlib=libc++")
-                self._add_lflags("-stdlib=libc++")
+                cflags += ["-stdlib=libc++"]
+                ldflags += ["-stdlib=libc++"]
+        self._add_cflags(cflags)
+        self._add_ldflags(ldflags)
 
     @property
     def cxx_std(self):
@@ -174,7 +177,8 @@
         if not level:
             return
 
-        self.extra_compile_args.append(STD_TMPL.format(level))
+        cflags = [STD_TMPL.format(level)]
+        ldflags = []
 
         if MACOS and "MACOSX_DEPLOYMENT_TARGET" not in os.environ:
             # C++17 requires a higher min version of macOS. An earlier version
@@ -186,18 +190,21 @@
             desired_macos = (10, 9) if level < 17 else (10, 14)
             macos_string = ".".join(str(x) for x in min(current_macos, desired_macos))
             macosx_min = "-mmacosx-version-min=" + macos_string
-            self.extra_compile_args.append(macosx_min)
-            self.extra_link_args.append(macosx_min)
+            cflags += [macosx_min]
+            ldflags += [macosx_min]
 
         if PY2:
             if WIN:
                 # Will be ignored on MSVC 2015, where C++17 is not supported so
                 # this flag is not valid.
-                self.extra_compile_args.append("/wd5033")
+                cflags += ["/wd5033"]
             elif level >= 17:
-                self.extra_compile_args.append("-Wno-register")
+                cflags += ["-Wno-register"]
             elif level >= 14:
-                self.extra_compile_args.append("-Wno-deprecated-register")
+                cflags += ["-Wno-deprecated-register"]
+
+        self._add_cflags(cflags)
+        self._add_ldflags(ldflags)
 
 
 # Just in case someone clever tries to multithread
@@ -232,7 +239,8 @@
     with tmp_chdir():
         fname = "flagcheck.cpp"
         with open(fname, "w") as f:
-            f.write("int main (int argc, char **argv) { return 0; }")
+            # Don't trigger -Wunused-parameter.
+            f.write("int main (int, char **) { return 0; }")
 
         try:
             compiler.compile([fname], extra_postargs=[flag])
diff --git a/setup.cfg b/setup.cfg
index e7fc8f4..041e62d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -43,12 +43,7 @@
     docs/**
     tools/**
     include/**
-    .appveyor.yml
-    .cmake-format.yaml
-    .gitmodules
-    .pre-commit-config.yaml
-    .readthedocs.yml
-    .clang-tidy
+    .*
     pybind11/include/**
     pybind11/share/**
     CMakeLists.txt
@@ -68,4 +63,23 @@
 
 [mypy]
 files = pybind11
-strict = True
+python_version = 2.7
+warn_unused_configs = True
+
+# Currently (0.800) identical to --strict
+disallow_any_generics = True
+disallow_subclassing_any = True
+disallow_untyped_calls = True
+disallow_untyped_defs = True
+disallow_incomplete_defs = True
+check_untyped_defs = True
+disallow_untyped_decorators = True
+no_implicit_optional = True
+warn_redundant_casts = True
+warn_unused_ignores = True
+warn_return_any = True
+no_implicit_reexport = True
+strict_equality = True
+
+[tool:pytest]
+timeout = 300
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index dae8b5a..3bfd5f1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -24,7 +24,7 @@
 # Usage:
 #   pybind11_filter_tests(LISTNAME file1.cpp file2.cpp ... MESSAGE "")
 #
-macro(PYBIND11_FILTER_TESTS LISTNAME)
+macro(pybind11_filter_tests LISTNAME)
   cmake_parse_arguments(ARG "" "MESSAGE" "" ${ARGN})
   set(PYBIND11_FILTER_TESTS_FOUND OFF)
   foreach(filename IN LISTS ARG_UNPARSED_ARGUMENTS)
@@ -39,6 +39,14 @@
   endif()
 endmacro()
 
+macro(possibly_uninitialized)
+  foreach(VARNAME ${ARGN})
+    if(NOT DEFINED "${VARNAME}")
+      set("${VARNAME}" "")
+    endif()
+  endforeach()
+endmacro()
+
 # New Python support
 if(DEFINED Python_EXECUTABLE)
   set(PYTHON_EXECUTABLE "${Python_EXECUTABLE}")
@@ -67,7 +75,7 @@
   find_package(pybind11 REQUIRED CONFIG)
 endif()
 
-if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+if(NOT CMAKE_BUILD_TYPE AND NOT DEFINED CMAKE_CONFIGURATION_TYPES)
   message(STATUS "Setting tests build type to MinSizeRel as none was specified")
   set(CMAKE_BUILD_TYPE
       MinSizeRel
@@ -345,11 +353,14 @@
   if(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
     set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
                                                "${CMAKE_CURRENT_BINARY_DIR}")
-    foreach(config ${CMAKE_CONFIGURATION_TYPES})
-      string(TOUPPER ${config} config)
-      set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config}
-                                                 "${CMAKE_CURRENT_BINARY_DIR}")
-    endforeach()
+
+    if(DEFINED CMAKE_CONFIGURATION_TYPES)
+      foreach(config ${CMAKE_CONFIGURATION_TYPES})
+        string(TOUPPER ${config} config)
+        set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config}
+                                                   "${CMAKE_CURRENT_BINARY_DIR}")
+      endforeach()
+    endif()
   endif()
 endforeach()
 
@@ -370,12 +381,17 @@
 string(REPLACE "test_" "${CMAKE_CURRENT_SOURCE_DIR}/test_" PYBIND11_ABS_PYTEST_FILES
                "${PYBIND11_PYTEST_FILES}")
 
+set(PYBIND11_TEST_PREFIX_COMMAND
+    ""
+    CACHE STRING "Put this before pytest, use for checkers and such")
+
 # A single command to compile and run the tests
 add_custom_target(
   pytest
-  COMMAND ${PYTHON_EXECUTABLE} -m pytest ${PYBIND11_ABS_PYTEST_FILES}
+  COMMAND ${PYBIND11_TEST_PREFIX_COMMAND} ${PYTHON_EXECUTABLE} -m pytest
+          ${PYBIND11_ABS_PYTEST_FILES}
   DEPENDS ${test_targets}
-  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
   USES_TERMINAL)
 
 if(PYBIND11_TEST_OVERRIDE)
@@ -386,6 +402,27 @@
             "Note: not all tests run: -DPYBIND11_TEST_OVERRIDE is in effect")
 endif()
 
+# cmake-format: off
+add_custom_target(
+  memcheck
+  COMMAND
+    PYTHONMALLOC=malloc
+    valgrind
+    --leak-check=full
+    --show-leak-kinds=definite,indirect
+    --errors-for-leak-kinds=definite,indirect
+    --error-exitcode=1
+    --read-var-info=yes
+    --track-origins=yes
+    --suppressions="${CMAKE_CURRENT_SOURCE_DIR}/valgrind-python.supp"
+    --suppressions="${CMAKE_CURRENT_SOURCE_DIR}/valgrind-numpy-scipy.supp"
+    --gen-suppressions=all
+    ${PYTHON_EXECUTABLE} -m pytest ${PYBIND11_ABS_PYTEST_FILES}
+  DEPENDS ${test_targets}
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  USES_TERMINAL)
+# cmake-format: on
+
 # Add a check target to run all the tests, starting with pytest (we add dependencies to this below)
 add_custom_target(check DEPENDS pytest)
 
diff --git a/tests/pybind11_tests.h b/tests/pybind11_tests.h
index 4ff56c0..ccb0529 100644
--- a/tests/pybind11_tests.h
+++ b/tests/pybind11_tests.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <pybind11/pybind11.h>
+#include <pybind11/eval.h>
 
 #if defined(_MSC_VER) && _MSC_VER < 1910
 // We get some really long type names here which causes MSVC 2015 to emit warnings
@@ -69,3 +70,15 @@
 };
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(pybind11)
+
+template <typename F>
+void ignoreOldStyleInitWarnings(F &&body) {
+    py::exec(R"(
+    message = "pybind11-bound class '.+' is using an old-style placement-new '(?:__init__|__setstate__)' which has been deprecated"
+
+    import warnings
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message=message, category=FutureWarning)
+        body()
+    )", py::dict(py::arg("body") = py::cpp_function(body)));
+}
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 80ed617..8768fc1 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -3,6 +3,8 @@
 numpy==1.18.0; platform_python_implementation=="PyPy" and sys_platform=="darwin" and python_version>="3.6"
 numpy==1.19.3; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version>="3.6" and python_version<"3.10"
 pytest==4.6.9; python_version<"3.5"
-pytest==5.4.3; python_version>="3.5"
+pytest==6.1.2; python_version=="3.5"
+pytest==6.2.1; python_version>="3.6"
+pytest-timeout
 scipy==1.2.3; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version<"3.6"
-scipy==1.5.2; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version>="3.6" and python_version<"3.9"
+scipy==1.5.4; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version>="3.6" and python_version<"3.10"
diff --git a/tests/test_buffers.py b/tests/test_buffers.py
index f0f3708..5084575 100644
--- a/tests/test_buffers.py
+++ b/tests/test_buffers.py
@@ -92,6 +92,8 @@
     view = memoryview(buf)
     assert view[0] == b"d" if env.PY2 else 0x64
     assert view.readonly
+    with pytest.raises(TypeError):
+        view[0] = b"\0" if env.PY2 else 0
 
 
 def test_selective_readonly_buffer():
diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp
index acc9f8f..f4e7756 100644
--- a/tests/test_builtin_casters.cpp
+++ b/tests/test_builtin_casters.cpp
@@ -15,6 +15,49 @@
 #  pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
 #endif
 
+struct ConstRefCasted {
+  int tag;
+};
+
+PYBIND11_NAMESPACE_BEGIN(pybind11)
+PYBIND11_NAMESPACE_BEGIN(detail)
+template <>
+class type_caster<ConstRefCasted> {
+ public:
+  static constexpr auto name = _<ConstRefCasted>();
+
+  // Input is unimportant, a new value will always be constructed based on the
+  // cast operator.
+  bool load(handle, bool) { return true; }
+
+  operator ConstRefCasted&&() { value = {1}; return std::move(value); }
+  operator ConstRefCasted&() { value = {2}; return value; }
+  operator ConstRefCasted*() { value = {3}; return &value; }
+
+  operator const ConstRefCasted&() { value = {4}; return value; }
+  operator const ConstRefCasted*() { value = {5}; return &value; }
+
+  // custom cast_op to explicitly propagate types to the conversion operators.
+  template <typename T_>
+  using cast_op_type =
+      /// const
+      conditional_t<
+          std::is_same<remove_reference_t<T_>, const ConstRefCasted*>::value, const ConstRefCasted*,
+      conditional_t<
+          std::is_same<T_, const ConstRefCasted&>::value, const ConstRefCasted&,
+      /// non-const
+      conditional_t<
+          std::is_same<remove_reference_t<T_>, ConstRefCasted*>::value, ConstRefCasted*,
+      conditional_t<
+          std::is_same<T_, ConstRefCasted&>::value, ConstRefCasted&,
+          /* else */ConstRefCasted&&>>>>;
+
+ private:
+  ConstRefCasted value = {0};
+};
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(pybind11)
+
 TEST_SUBMODULE(builtin_casters, m) {
     // test_simple_string
     m.def("string_roundtrip", [](const char *s) { return s; });
@@ -98,6 +141,10 @@
     m.def("i64_str", [](std::int64_t v) { return std::to_string(v); });
     m.def("u64_str", [](std::uint64_t v) { return std::to_string(v); });
 
+    // test_int_convert
+    m.def("int_passthrough", [](int arg) { return arg; });
+    m.def("int_passthrough_noconvert", [](int arg) { return arg; }, py::arg{}.noconvert());
+
     // test_tuple
     m.def("pair_passthrough", [](std::pair<bool, std::string> input) {
         return std::make_pair(input.second, input.first);
@@ -140,13 +187,35 @@
     m.def("load_nullptr_t", [](std::nullptr_t) {}); // not useful, but it should still compile
     m.def("cast_nullptr_t", []() { return std::nullptr_t{}; });
 
+    // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
+
     // test_bool_caster
     m.def("bool_passthrough", [](bool arg) { return arg; });
-    m.def("bool_passthrough_noconvert", [](bool arg) { return arg; }, py::arg().noconvert());
+    m.def("bool_passthrough_noconvert", [](bool arg) { return arg; }, py::arg{}.noconvert());
+
+    // TODO: This should be disabled and fixed in future Intel compilers
+#if !defined(__INTEL_COMPILER)
+    // Test "bool_passthrough_noconvert" again, but using () instead of {} to construct py::arg
+    // When compiled with the Intel compiler, this results in segmentation faults when importing
+    // the module. Tested with icc (ICC) 2021.1 Beta 20200827, this should be tested again when
+    // a newer version of icc is available.
+    m.def("bool_passthrough_noconvert2", [](bool arg) { return arg; }, py::arg().noconvert());
+#endif
 
     // test_reference_wrapper
     m.def("refwrap_builtin", [](std::reference_wrapper<int> p) { return 10 * p.get(); });
     m.def("refwrap_usertype", [](std::reference_wrapper<UserType> p) { return p.get().value(); });
+    m.def("refwrap_usertype_const", [](std::reference_wrapper<const UserType> p) { return p.get().value(); });
+
+    m.def("refwrap_lvalue", []() -> std::reference_wrapper<UserType> {
+        static UserType x(1);
+        return std::ref(x);
+    });
+    m.def("refwrap_lvalue_const", []() -> std::reference_wrapper<const UserType> {
+        static UserType x(1);
+        return std::cref(x);
+    });
+
     // Not currently supported (std::pair caster has return-by-value cast operator);
     // triggers static_assert failure.
     //m.def("refwrap_pair", [](std::reference_wrapper<std::pair<int, int>>) { });
@@ -189,4 +258,14 @@
         py::object o = py::cast(v);
         return py::cast<void *>(o) == v;
     });
+
+    // Tests const/non-const propagation in cast_op.
+    m.def("takes", [](ConstRefCasted x) { return x.tag; });
+    m.def("takes_move", [](ConstRefCasted&& x) { return x.tag; });
+    m.def("takes_ptr", [](ConstRefCasted* x) { return x->tag; });
+    m.def("takes_ref", [](ConstRefCasted& x) { return x.tag; });
+    m.def("takes_ref_wrap", [](std::reference_wrapper<ConstRefCasted> x) { return x.get().tag; });
+    m.def("takes_const_ptr", [](const ConstRefCasted* x) { return x->tag; });
+    m.def("takes_const_ref", [](const ConstRefCasted& x) { return x.tag; });
+    m.def("takes_const_ref_wrap", [](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
 }
diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py
index bd7996b..cb37dbc 100644
--- a/tests/test_builtin_casters.py
+++ b/tests/test_builtin_casters.py
@@ -251,6 +251,88 @@
         assert "incompatible function arguments" in str(excinfo.value)
 
 
+def test_int_convert():
+    class Int(object):
+        def __int__(self):
+            return 42
+
+    class NotInt(object):
+        pass
+
+    class Float(object):
+        def __float__(self):
+            return 41.99999
+
+    class Index(object):
+        def __index__(self):
+            return 42
+
+    class IntAndIndex(object):
+        def __int__(self):
+            return 42
+
+        def __index__(self):
+            return 0
+
+    class RaisingTypeErrorOnIndex(object):
+        def __index__(self):
+            raise TypeError
+
+        def __int__(self):
+            return 42
+
+    class RaisingValueErrorOnIndex(object):
+        def __index__(self):
+            raise ValueError
+
+        def __int__(self):
+            return 42
+
+    convert, noconvert = m.int_passthrough, m.int_passthrough_noconvert
+
+    def requires_conversion(v):
+        pytest.raises(TypeError, noconvert, v)
+
+    def cant_convert(v):
+        pytest.raises(TypeError, convert, v)
+
+    assert convert(7) == 7
+    assert noconvert(7) == 7
+    cant_convert(3.14159)
+    assert convert(Int()) == 42
+    requires_conversion(Int())
+    cant_convert(NotInt())
+    cant_convert(Float())
+
+    # Before Python 3.8, `PyLong_AsLong` does not pick up on `obj.__index__`,
+    # but pybind11 "backports" this behavior.
+    assert convert(Index()) == 42
+    assert noconvert(Index()) == 42
+    assert convert(IntAndIndex()) == 0  # Fishy; `int(DoubleThought)` == 42
+    assert noconvert(IntAndIndex()) == 0
+    assert convert(RaisingTypeErrorOnIndex()) == 42
+    requires_conversion(RaisingTypeErrorOnIndex())
+    assert convert(RaisingValueErrorOnIndex()) == 42
+    requires_conversion(RaisingValueErrorOnIndex())
+
+
+def test_numpy_int_convert():
+    np = pytest.importorskip("numpy")
+
+    convert, noconvert = m.int_passthrough, m.int_passthrough_noconvert
+
+    def require_implicit(v):
+        pytest.raises(TypeError, noconvert, v)
+
+    # `np.intc` is an alias that corresponds to a C++ `int`
+    assert convert(np.intc(42)) == 42
+    assert noconvert(np.intc(42)) == 42
+
+    # The implicit conversion from np.float32 is undesirable but currently accepted.
+    assert convert(np.float32(3.14159)) == 3
+    require_implicit(np.float32(3.14159))
+
+
 def test_tuple(doc):
     """std::pair <-> tuple & std::tuple <-> tuple"""
     assert m.pair_passthrough((True, "test")) == ("test", True)
@@ -315,6 +397,7 @@
     """std::reference_wrapper for builtin and user types"""
     assert m.refwrap_builtin(42) == 420
     assert m.refwrap_usertype(UserType(42)) == 42
+    assert m.refwrap_usertype_const(UserType(42)) == 42
 
     with pytest.raises(TypeError) as excinfo:
         m.refwrap_builtin(None)
@@ -324,6 +407,9 @@
         m.refwrap_usertype(None)
     assert "incompatible function arguments" in str(excinfo.value)
 
+    assert m.refwrap_lvalue().value == 1
+    assert m.refwrap_lvalue_const().value == 1
+
     a1 = m.refwrap_list(copy=True)
     a2 = m.refwrap_list(copy=True)
     assert [x.value for x in a1] == [2, 3]
@@ -421,3 +507,21 @@
 
 def test_void_caster_2():
     assert m.test_void_caster()
+
+
+def test_const_ref_caster():
+    """Verifies that const-ref is propagated through type_caster cast_op.
+    The returned ConstRefCasted type is a mimimal type that is constructed to
+    reference the casting mode used.
+    """
+    x = False
+    assert m.takes(x) == 1
+    assert m.takes_move(x) == 1
+
+    assert m.takes_ptr(x) == 3
+    assert m.takes_ref(x) == 2
+    assert m.takes_ref_wrap(x) == 2
+
+    assert m.takes_const_ptr(x) == 5
+    assert m.takes_const_ref(x) == 4
+    assert m.takes_const_ref_wrap(x) == 4
diff --git a/tests/test_callbacks.cpp b/tests/test_callbacks.cpp
index 683dfb3..dffe538 100644
--- a/tests/test_callbacks.cpp
+++ b/tests/test_callbacks.cpp
@@ -119,7 +119,10 @@
 
     class AbstractBase {
     public:
-        virtual ~AbstractBase() = default;
+        // [workaround(intel)] = default does not work here
+        // Defaulting this destructor results in linking errors with the Intel compiler
+        // (in Debug builds only, tested with icpc (ICC) 2021.1 Beta 20200827)
+        virtual ~AbstractBase() {};  // NOLINT(modernize-use-equals-default)
         virtual unsigned int func() = 0;
     };
     m.def("func_accepting_func_accepting_base", [](std::function<double(AbstractBase&)>) { });
diff --git a/tests/test_class.cpp b/tests/test_class.cpp
index 890fab7..6ce928c 100644
--- a/tests/test_class.cpp
+++ b/tests/test_class.cpp
@@ -7,6 +7,13 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
+#if defined(__INTEL_COMPILER) && __cplusplus >= 201703L
+// Intel compiler requires a separate header file to support aligned new operators
+// and does not set the __cpp_aligned_new feature macro.
+// This header needs to be included before pybind11.
+#include <aligned_new>
+#endif
+
 #include "pybind11_tests.h"
 #include "constructor_stats.h"
 #include "local_bindings.h"
@@ -231,7 +238,8 @@
         };
 
         auto def = new PyMethodDef{"f", f, METH_VARARGS, nullptr};
-        return py::reinterpret_steal<py::object>(PyCFunction_NewEx(def, nullptr, m.ptr()));
+        py::capsule def_capsule(def, [](void *ptr) { delete reinterpret_cast<PyMethodDef *>(ptr); });
+        return py::reinterpret_steal<py::object>(PyCFunction_NewEx(def, def_capsule.ptr(), m.ptr()));
     }());
 
     // test_operator_new_delete
@@ -322,6 +330,10 @@
 
     class PublicistB : public ProtectedB {
     public:
+        // [workaround(intel)] = default does not work here
+        // Removing or defaulting this destructor results in linking errors with the Intel compiler
+        // (in Debug builds only, tested with icpc (ICC) 2021.1 Beta 20200827)
+        ~PublicistB() override {};  // NOLINT(modernize-use-equals-default)
         using ProtectedB::foo;
     };
 
diff --git a/tests/test_cmake_build/CMakeLists.txt b/tests/test_cmake_build/CMakeLists.txt
index 0c0578a..8bfaa38 100644
--- a/tests/test_cmake_build/CMakeLists.txt
+++ b/tests/test_cmake_build/CMakeLists.txt
@@ -25,7 +25,7 @@
   endif()
 
   if(NOT ARG_INSTALL)
-    list(APPEND build_options "-DPYBIND11_PROJECT_DIR=${pybind11_SOURCE_DIR}")
+    list(APPEND build_options "-Dpybind11_SOURCE_DIR=${pybind11_SOURCE_DIR}")
   else()
     list(APPEND build_options "-DCMAKE_PREFIX_PATH=${pybind11_BINARY_DIR}/mock_install")
   endif()
@@ -55,6 +55,8 @@
   add_dependencies(test_cmake_build test_build_${name})
 endfunction()
 
+possibly_uninitialized(PYTHON_MODULE_EXTENSION Python_INTERPRETER_ID)
+
 pybind11_add_build_test(subdirectory_function)
 pybind11_add_build_test(subdirectory_target)
 if("${PYTHON_MODULE_EXTENSION}" MATCHES "pypy" OR "${Python_INTERPRETER_ID}" STREQUAL "PyPy")
@@ -77,3 +79,6 @@
 endif()
 
 add_dependencies(check test_cmake_build)
+
+add_subdirectory(subdirectory_target EXCLUDE_FROM_ALL)
+add_subdirectory(subdirectory_embed EXCLUDE_FROM_ALL)
diff --git a/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt b/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt
index c7df0cf..dfb9cb8 100644
--- a/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt
+++ b/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt
@@ -16,7 +16,7 @@
     CACHE BOOL "")
 set(PYBIND11_EXPORT_NAME test_export)
 
-add_subdirectory(${PYBIND11_PROJECT_DIR} pybind11)
+add_subdirectory("${pybind11_SOURCE_DIR}" pybind11)
 
 # Test basic target functionality
 add_executable(test_subdirectory_embed ../embed.cpp)
@@ -24,7 +24,7 @@
 set_target_properties(test_subdirectory_embed PROPERTIES OUTPUT_NAME test_cmake_build)
 
 add_custom_target(check_subdirectory_embed $<TARGET_FILE:test_subdirectory_embed>
-                                           ${PROJECT_SOURCE_DIR}/../test.py)
+                                           "${PROJECT_SOURCE_DIR}/../test.py")
 
 # Test custom export group -- PYBIND11_EXPORT_NAME
 add_library(test_embed_lib ../embed.cpp)
diff --git a/tests/test_cmake_build/subdirectory_function/CMakeLists.txt b/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
index 624c600..34aedcf 100644
--- a/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
+++ b/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
@@ -11,7 +11,7 @@
 
 project(test_subdirectory_function CXX)
 
-add_subdirectory("${PYBIND11_PROJECT_DIR}" pybind11)
+add_subdirectory("${pybind11_SOURCE_DIR}" pybind11)
 pybind11_add_module(test_subdirectory_function ../main.cpp)
 set_target_properties(test_subdirectory_function PROPERTIES OUTPUT_NAME test_cmake_build)
 
diff --git a/tests/test_cmake_build/subdirectory_target/CMakeLists.txt b/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
index 2471941..31d862f 100644
--- a/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
+++ b/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
@@ -11,7 +11,7 @@
 
 project(test_subdirectory_target CXX)
 
-add_subdirectory(${PYBIND11_PROJECT_DIR} pybind11)
+add_subdirectory("${pybind11_SOURCE_DIR}" pybind11)
 
 add_library(test_subdirectory_target MODULE ../main.cpp)
 set_target_properties(test_subdirectory_target PROPERTIES OUTPUT_NAME test_cmake_build)
diff --git a/tests/test_constants_and_functions.cpp b/tests/test_constants_and_functions.cpp
index f607795..8855dd7 100644
--- a/tests/test_constants_and_functions.cpp
+++ b/tests/test_constants_and_functions.cpp
@@ -46,7 +46,14 @@
 // Test that we properly handle C++17 exception specifiers (which are part of the function signature
 // in C++17).  These should all still work before C++17, but don't affect the function signature.
 namespace test_exc_sp {
+// [workaround(intel)] Unable to use noexcept instead of noexcept(true)
+// Make the f1 test basically the same as the f2 test in C++17 mode for the Intel compiler as
+// it fails to compile with a plain noexcept (tested with icc (ICC) 2021.1 Beta 20200827).
+#if defined(__INTEL_COMPILER) && defined(PYBIND11_CPP17)
+int f1(int x) noexcept(true) { return x+1; }
+#else
 int f1(int x) noexcept { return x+1; }
+#endif
 int f2(int x) noexcept(true) { return x+2; }
 int f3(int x) noexcept(false) { return x+3; }
 #if defined(__GNUG__)
@@ -124,4 +131,19 @@
     m.def("f2", f2);
     m.def("f3", f3);
     m.def("f4", f4);
+
+    // test_function_record_leaks
+    struct LargeCapture {
+        // This should always be enough to trigger the alternative branch
+        // where `sizeof(capture) > sizeof(rec->data)`
+        uint64_t zeros[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    };
+    m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) {
+        LargeCapture capture;  // VS 2015's MSVC is acting up if we create the array here
+        m.def("should_raise", [capture](int) { return capture.zeros[9] + 33; }, py::kw_only(), py::arg());
+    });
+    m.def("register_with_raising_repr", [](py::module_ m, py::object default_value) {
+        m.def("should_raise", [](int, int, py::object) { return 42; }, "some docstring",
+              py::arg_v("x", 42), py::arg_v("y", 42, "<the answer>"), py::arg_v("z", default_value));
+    });
 }
diff --git a/tests/test_constants_and_functions.py b/tests/test_constants_and_functions.py
index b980ccf..ff13bd0 100644
--- a/tests/test_constants_and_functions.py
+++ b/tests/test_constants_and_functions.py
@@ -40,3 +40,14 @@
     assert m.f2(53) == 55
     assert m.f3(86) == 89
     assert m.f4(140) == 144
+
+
+def test_function_record_leaks():
+    class RaisingRepr:
+        def __repr__(self):
+            raise RuntimeError("Surprise!")
+
+    with pytest.raises(RuntimeError):
+        m.register_large_capture_with_invalid_arguments(m)
+    with pytest.raises(RuntimeError):
+        m.register_with_raising_repr(m, RaisingRepr())
diff --git a/tests/test_copy_move.cpp b/tests/test_copy_move.cpp
index 2704217..322e9bb 100644
--- a/tests/test_copy_move.cpp
+++ b/tests/test_copy_move.cpp
@@ -214,6 +214,7 @@
     };
     py::class_<MoveIssue2>(m, "MoveIssue2").def(py::init<int>()).def_readwrite("value", &MoveIssue2::v);
 
-    m.def("get_moveissue1", [](int i) { return new MoveIssue1(i); }, py::return_value_policy::move);
+    // #2742: Don't expect ownership of raw pointer to `new`ed object to be transferred with `py::return_value_policy::move`
+    m.def("get_moveissue1", [](int i) { return std::unique_ptr<MoveIssue1>(new MoveIssue1(i)); }, py::return_value_policy::move);
     m.def("get_moveissue2", [](int i) { return MoveIssue2(i); }, py::return_value_policy::move);
 }
diff --git a/tests/test_copy_move.py b/tests/test_copy_move.py
index 7e3cc16..1d98952 100644
--- a/tests/test_copy_move.py
+++ b/tests/test_copy_move.py
@@ -119,7 +119,7 @@
 def test_move_fallback():
     """#389: rvp::move should fall-through to copy on non-movable objects"""
 
-    m2 = m.get_moveissue2(2)
-    assert m2.value == 2
     m1 = m.get_moveissue1(1)
     assert m1.value == 1
+    m2 = m.get_moveissue2(2)
+    assert m2.value == 2
diff --git a/tests/test_custom_type_casters.cpp b/tests/test_custom_type_casters.cpp
index d565add..3fe910d 100644
--- a/tests/test_custom_type_casters.cpp
+++ b/tests/test_custom_type_casters.cpp
@@ -99,19 +99,20 @@
         }
         static ArgInspector2 h(ArgInspector2 a, ArgAlwaysConverts) { return a; }
     };
+    // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
     py::class_<ArgInspector>(m, "ArgInspector")
         .def(py::init<>())
         .def("f", &ArgInspector::f, py::arg(), py::arg() = ArgAlwaysConverts())
         .def("g", &ArgInspector::g, "a"_a.noconvert(), "b"_a, "c"_a.noconvert()=13, "d"_a=ArgInspector2(), py::arg() = ArgAlwaysConverts())
-        .def_static("h", &ArgInspector::h, py::arg().noconvert(), py::arg() = ArgAlwaysConverts())
+        .def_static("h", &ArgInspector::h, py::arg{}.noconvert(), py::arg() = ArgAlwaysConverts())
         ;
     m.def("arg_inspect_func", [](ArgInspector2 a, ArgInspector1 b, ArgAlwaysConverts) { return a.arg + "\n" + b.arg; },
-            py::arg().noconvert(false), py::arg_v(nullptr, ArgInspector1()).noconvert(true), py::arg() = ArgAlwaysConverts());
+            py::arg{}.noconvert(false), py::arg_v(nullptr, ArgInspector1()).noconvert(true), py::arg() = ArgAlwaysConverts());
 
-    m.def("floats_preferred", [](double f) { return 0.5 * f; }, py::arg("f"));
-    m.def("floats_only", [](double f) { return 0.5 * f; }, py::arg("f").noconvert());
-    m.def("ints_preferred", [](int i) { return i / 2; }, py::arg("i"));
-    m.def("ints_only", [](int i) { return i / 2; }, py::arg("i").noconvert());
+    m.def("floats_preferred", [](double f) { return 0.5 * f; }, "f"_a);
+    m.def("floats_only", [](double f) { return 0.5 * f; }, "f"_a.noconvert());
+    m.def("ints_preferred", [](int i) { return i / 2; }, "i"_a);
+    m.def("ints_only", [](int i) { return i / 2; }, "i"_a.noconvert());
 
     // test_custom_caster_destruction
     // Test that `take_ownership` works on types with a custom type caster when given a pointer
diff --git a/tests/test_docstring_options.cpp b/tests/test_docstring_options.cpp
index 8c8f79f..8a97af5 100644
--- a/tests/test_docstring_options.cpp
+++ b/tests/test_docstring_options.cpp
@@ -48,6 +48,14 @@
     {
         py::options options;
         options.disable_user_defined_docstrings();
+        options.disable_function_signatures();
+
+        m.def("test_function8", []() {});
+    }
+
+    {
+        py::options options;
+        options.disable_user_defined_docstrings();
 
         struct DocstringTestFoo {
             int value;
diff --git a/tests/test_docstring_options.py b/tests/test_docstring_options.py
index 87d80d2..8ee6613 100644
--- a/tests/test_docstring_options.py
+++ b/tests/test_docstring_options.py
@@ -34,6 +34,9 @@
     assert m.test_function7.__doc__.startswith("test_function7(a: int, b: int) -> None")
     assert m.test_function7.__doc__.endswith("A custom docstring\n")
 
+    # when all options are disabled, no docstring (instead of an empty one) should be generated
+    assert m.test_function8.__doc__ is None
+
     # Suppression of user-defined docstrings for non-function objects
     assert not m.DocstringTestFoo.__doc__
     assert not m.DocstringTestFoo.value_prop.__doc__
diff --git a/tests/test_eigen.cpp b/tests/test_eigen.cpp
index 2cc2243..8432547 100644
--- a/tests/test_eigen.cpp
+++ b/tests/test_eigen.cpp
@@ -273,6 +273,7 @@
     m.def("cpp_ref_r", [](py::handle m) { return m.cast<Eigen::Ref<MatrixXdR>>()(1, 0); });
     m.def("cpp_ref_any", [](py::handle m) { return m.cast<py::EigenDRef<Eigen::MatrixXd>>()(1, 0); });
 
+    // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
 
     // test_nocopy_wrapper
     // Test that we can prevent copying into an argument that would normally copy: First a version
@@ -280,17 +281,17 @@
     m.def("get_elem", &get_elem);
     // Now this alternative that calls the tells pybind to fail rather than copy:
     m.def("get_elem_nocopy", [](Eigen::Ref<const Eigen::MatrixXd> m) -> double { return get_elem(m); },
-            py::arg().noconvert());
+            py::arg{}.noconvert());
     // Also test a row-major-only no-copy const ref:
     m.def("get_elem_rm_nocopy", [](Eigen::Ref<const Eigen::Matrix<long, -1, -1, Eigen::RowMajor>> &m) -> long { return m(2, 1); },
-            py::arg().noconvert());
+            py::arg{}.noconvert());
 
     // test_issue738
     // Issue #738: 1xN or Nx1 2D matrices were neither accepted nor properly copied with an
     // incompatible stride value on the length-1 dimension--but that should be allowed (without
     // requiring a copy!) because the stride value can be safely ignored on a size-1 dimension.
-    m.def("iss738_f1", &adjust_matrix<const Eigen::Ref<const Eigen::MatrixXd> &>, py::arg().noconvert());
-    m.def("iss738_f2", &adjust_matrix<const Eigen::Ref<const Eigen::Matrix<double, -1, -1, Eigen::RowMajor>> &>, py::arg().noconvert());
+    m.def("iss738_f1", &adjust_matrix<const Eigen::Ref<const Eigen::MatrixXd> &>, py::arg{}.noconvert());
+    m.def("iss738_f2", &adjust_matrix<const Eigen::Ref<const Eigen::Matrix<double, -1, -1, Eigen::RowMajor>> &>, py::arg{}.noconvert());
 
     // test_issue1105
     // Issue #1105: when converting from a numpy two-dimensional (Nx1) or (1xN) value into a dense
diff --git a/tests/test_embed/CMakeLists.txt b/tests/test_embed/CMakeLists.txt
index fabcb24..c960c87 100644
--- a/tests/test_embed/CMakeLists.txt
+++ b/tests/test_embed/CMakeLists.txt
@@ -1,4 +1,7 @@
+possibly_uninitialized(PYTHON_MODULE_EXTENSION Python_INTERPRETER_ID)
+
 if("${PYTHON_MODULE_EXTENSION}" MATCHES "pypy" OR "${Python_INTERPRETER_ID}" STREQUAL "PyPy")
+  message(STATUS "Skipping embed test on PyPy")
   add_custom_target(cpptest) # Dummy target on PyPy. Embedding is not supported.
   set(_suppress_unused_variable_warning "${DOWNLOAD_CATCH}")
   return()
diff --git a/tests/test_enum.py b/tests/test_enum.py
index f6b24fc..e9732fa 100644
--- a/tests/test_enum.py
+++ b/tests/test_enum.py
@@ -13,15 +13,24 @@
 
     # name property
     assert m.UnscopedEnum.EOne.name == "EOne"
+    assert m.UnscopedEnum.EOne.value == 1
     assert m.UnscopedEnum.ETwo.name == "ETwo"
-    assert m.EOne.name == "EOne"
-    # name readonly
+    assert m.UnscopedEnum.ETwo.value == 2
+    assert m.EOne is m.UnscopedEnum.EOne
+    # name, value readonly
     with pytest.raises(AttributeError):
         m.UnscopedEnum.EOne.name = ""
-    # name returns a copy
-    foo = m.UnscopedEnum.EOne.name
-    foo = "bar"
+    with pytest.raises(AttributeError):
+        m.UnscopedEnum.EOne.value = 10
+    # name, value returns a copy
+    # TODO: Neither the name nor value tests actually check against aliasing.
+    # Use a mutable type that has reference semantics.
+    nonaliased_name = m.UnscopedEnum.EOne.name
+    nonaliased_name = "bar"  # noqa: F841
     assert m.UnscopedEnum.EOne.name == "EOne"
+    nonaliased_value = m.UnscopedEnum.EOne.value
+    nonaliased_value = 10  # noqa: F841
+    assert m.UnscopedEnum.EOne.value == 1
 
     # __members__ property
     assert m.UnscopedEnum.__members__ == {
@@ -33,8 +42,8 @@
     with pytest.raises(AttributeError):
         m.UnscopedEnum.__members__ = {}
     # __members__ returns a copy
-    foo = m.UnscopedEnum.__members__
-    foo["bar"] = "baz"
+    nonaliased_members = m.UnscopedEnum.__members__
+    nonaliased_members["bar"] = "baz"
     assert m.UnscopedEnum.__members__ == {
         "EOne": m.UnscopedEnum.EOne,
         "ETwo": m.UnscopedEnum.ETwo,
@@ -73,25 +82,25 @@
     assert not (y == "2")
 
     with pytest.raises(TypeError):
-        y < object()
+        y < object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y <= object()
+        y <= object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y > object()
+        y > object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y >= object()
+        y >= object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y | object()
+        y | object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y & object()
+        y & object()  # noqa: B015
 
     with pytest.raises(TypeError):
-        y ^ object()
+        y ^ object()  # noqa: B015
 
     assert int(m.UnscopedEnum.ETwo) == 2
     assert str(m.UnscopedEnum(2)) == "UnscopedEnum.ETwo"
@@ -134,13 +143,13 @@
     assert not (z == object())
     # Scoped enums will *NOT* accept >, <, >= and <= int comparisons (Will throw exceptions)
     with pytest.raises(TypeError):
-        z > 3
+        z > 3  # noqa: B015
     with pytest.raises(TypeError):
-        z < 3
+        z < 3  # noqa: B015
     with pytest.raises(TypeError):
-        z >= 3
+        z >= 3  # noqa: B015
     with pytest.raises(TypeError):
-        z <= 3
+        z <= 3  # noqa: B015
 
     # order
     assert m.ScopedEnum.Two < m.ScopedEnum.Three
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
index 95eac70..c6cb652 100644
--- a/tests/test_exceptions.py
+++ b/tests/test_exceptions.py
@@ -50,13 +50,24 @@
     assert d["good"] is True
 
 
+def ignore_pytest_unraisable_warning(f):
+    unraisable = "PytestUnraisableExceptionWarning"
+    if hasattr(pytest, unraisable):  # Python >= 3.8 and pytest >= 6
+        dec = pytest.mark.filterwarnings("ignore::pytest.{}".format(unraisable))
+        return dec(f)
+    else:
+        return f
+
+
+@ignore_pytest_unraisable_warning
 def test_python_alreadyset_in_destructor(monkeypatch, capsys):
     hooked = False
     triggered = [False]  # mutable, so Python 2.7 closure can modify it
 
     if hasattr(sys, "unraisablehook"):  # Python 3.8+
         hooked = True
-        default_hook = sys.unraisablehook
+        # Don't take `sys.unraisablehook`, as that's overwritten by pytest
+        default_hook = sys.__unraisablehook__
 
         def hook(unraisable_hook_args):
             exc_type, exc_value, exc_tb, err_msg, obj = unraisable_hook_args
diff --git a/tests/test_factory_constructors.cpp b/tests/test_factory_constructors.cpp
index f42d1f2..7ff7e7b 100644
--- a/tests/test_factory_constructors.cpp
+++ b/tests/test_factory_constructors.cpp
@@ -183,11 +183,14 @@
     auto c4a = [c](pointer_tag, TF4_tag, int a) { (void) c; return new TestFactory4(a);};
 
     // test_init_factory_basic, test_init_factory_casting
-    py::class_<TestFactory3, std::shared_ptr<TestFactory3>>(m, "TestFactory3")
+    py::class_<TestFactory3, std::shared_ptr<TestFactory3>> pyTestFactory3(m, "TestFactory3");
+    pyTestFactory3
         .def(py::init([](pointer_tag, int v) { return TestFactoryHelper::construct3(v); }))
-        .def(py::init([](shared_ptr_tag) { return TestFactoryHelper::construct3(); }))
-        .def("__init__", [](TestFactory3 &self, std::string v) { new (&self) TestFactory3(v); }) // placement-new ctor
-
+        .def(py::init([](shared_ptr_tag) { return TestFactoryHelper::construct3(); }));
+    ignoreOldStyleInitWarnings([&pyTestFactory3]() {
+        pyTestFactory3.def("__init__", [](TestFactory3 &self, std::string v) { new (&self) TestFactory3(v); }); // placement-new ctor
+    });
+    pyTestFactory3
         // factories returning a derived type:
         .def(py::init(c4a)) // derived ptr
         .def(py::init([](pointer_tag, TF5_tag, int a) { return new TestFactory5(a); }))
@@ -304,24 +307,32 @@
         static void operator delete(void *p) { py::print("noisy delete"); ::operator delete(p); }
 #endif
     };
-    py::class_<NoisyAlloc>(m, "NoisyAlloc")
+
+
+    py::class_<NoisyAlloc> pyNoisyAlloc(m, "NoisyAlloc");
         // Since these overloads have the same number of arguments, the dispatcher will try each of
         // them until the arguments convert.  Thus we can get a pre-allocation here when passing a
         // single non-integer:
-        .def("__init__", [](NoisyAlloc *a, int i) { new (a) NoisyAlloc(i); }) // Regular constructor, runs first, requires preallocation
-        .def(py::init([](double d) { return new NoisyAlloc(d); }))
+    ignoreOldStyleInitWarnings([&pyNoisyAlloc]() {
+        pyNoisyAlloc.def("__init__", [](NoisyAlloc *a, int i) { new (a) NoisyAlloc(i); }); // Regular constructor, runs first, requires preallocation
+    });
 
-        // The two-argument version: first the factory pointer overload.
-        .def(py::init([](int i, int) { return new NoisyAlloc(i); }))
-        // Return-by-value:
-        .def(py::init([](double d, int) { return NoisyAlloc(d); }))
-        // Old-style placement new init; requires preallocation
-        .def("__init__", [](NoisyAlloc &a, double d, double) { new (&a) NoisyAlloc(d); })
-        // Requires deallocation of previous overload preallocated value:
-        .def(py::init([](int i, double) { return new NoisyAlloc(i); }))
-        // Regular again: requires yet another preallocation
-        .def("__init__", [](NoisyAlloc &a, int i, std::string) { new (&a) NoisyAlloc(i); })
-        ;
+    pyNoisyAlloc.def(py::init([](double d) { return new NoisyAlloc(d); }));
+
+    // The two-argument version: first the factory pointer overload.
+    pyNoisyAlloc.def(py::init([](int i, int) { return new NoisyAlloc(i); }));
+    // Return-by-value:
+    pyNoisyAlloc.def(py::init([](double d, int) { return NoisyAlloc(d); }));
+    // Old-style placement new init; requires preallocation
+    ignoreOldStyleInitWarnings([&pyNoisyAlloc]() {
+        pyNoisyAlloc.def("__init__", [](NoisyAlloc &a, double d, double) { new (&a) NoisyAlloc(d); });
+    });
+    // Requires deallocation of previous overload preallocated value:
+    pyNoisyAlloc.def(py::init([](int i, double) { return new NoisyAlloc(i); }));
+    // Regular again: requires yet another preallocation
+    ignoreOldStyleInitWarnings([&pyNoisyAlloc]() {
+        pyNoisyAlloc.def("__init__", [](NoisyAlloc &a, int i, std::string) { new (&a) NoisyAlloc(i); });
+    });
 
 
 
diff --git a/tests/test_iostream.cpp b/tests/test_iostream.cpp
index e916150..1be0655 100644
--- a/tests/test_iostream.cpp
+++ b/tests/test_iostream.cpp
@@ -7,10 +7,15 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
+#if defined(_MSC_VER) && _MSC_VER < 1910  // VS 2015's MSVC
+#  pragma warning(disable: 4702) // unreachable code in system header (xatomic.h(382))
+#endif
 
 #include <pybind11/iostream.h>
 #include "pybind11_tests.h"
+#include <atomic>
 #include <iostream>
+#include <thread>
 
 
 void noisy_function(std::string msg, bool flush) {
@@ -25,6 +30,40 @@
     std::cerr << emsg;
 }
 
+// object to manage C++ thread
+// simply repeatedly write to std::cerr until stopped
+// redirect is called at some point to test the safety of scoped_estream_redirect
+struct TestThread {
+    TestThread() : t_{nullptr}, stop_{false} {
+        auto thread_f = [this] {
+            while (!stop_) {
+                std::cout << "x" << std::flush;
+                std::this_thread::sleep_for(std::chrono::microseconds(50));
+            } };
+        t_ = new std::thread(std::move(thread_f));
+    }
+
+    ~TestThread() {
+        delete t_;
+    }
+
+    void stop() { stop_ = true; }
+
+    void join() {
+        py::gil_scoped_release gil_lock;
+        t_->join();
+    }
+
+    void sleep() {
+        py::gil_scoped_release gil_lock;
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+    }
+
+    std::thread * t_;
+    std::atomic<bool> stop_;
+};
+
+
 TEST_SUBMODULE(iostream, m) {
 
     add_ostream_redirect(m);
@@ -70,4 +109,10 @@
         std::cout << msg << std::flush;
         std::cerr << emsg << std::flush;
     });
+
+    py::class_<TestThread>(m, "TestThread")
+        .def(py::init<>())
+        .def("stop", &TestThread::stop)
+        .def("join", &TestThread::join)
+        .def("sleep", &TestThread::sleep);
 }
diff --git a/tests/test_iostream.py b/tests/test_iostream.py
index 506db42..6d493be 100644
--- a/tests/test_iostream.py
+++ b/tests/test_iostream.py
@@ -216,3 +216,26 @@
     assert stderr == ""
     assert stream.getvalue() == msg
     assert stream2.getvalue() == msg2
+
+
+def test_threading():
+    with m.ostream_redirect(stdout=True, stderr=False):
+        # start some threads
+        threads = []
+
+        # start some threads
+        for _j in range(20):
+            threads.append(m.TestThread())
+
+        # give the threads some time to fail
+        threads[0].sleep()
+
+        # stop all the threads
+        for t in threads:
+            t.stop()
+
+        for t in threads:
+            t.join()
+
+        # if a thread segfaults, we don't get here
+        assert True
diff --git a/tests/test_local_bindings.py b/tests/test_local_bindings.py
index d23c467..a38564b 100644
--- a/tests/test_local_bindings.py
+++ b/tests/test_local_bindings.py
@@ -193,7 +193,7 @@
     v2 = [1, 2, 3]
     assert m.load_vector_via_caster(v2) == 6
     with pytest.raises(TypeError) as excinfo:
-        cm.load_vector_via_binding(v2) == 6
+        cm.load_vector_via_binding(v2)
     assert (
         msg(excinfo.value)
         == """
diff --git a/tests/test_methods_and_attributes.cpp b/tests/test_methods_and_attributes.cpp
index 6a2cfb6..f99909b 100644
--- a/tests/test_methods_and_attributes.cpp
+++ b/tests/test_methods_and_attributes.cpp
@@ -322,22 +322,24 @@
         m.def("should_fail", [](int, UnregisteredType) {}, py::arg(), py::arg() = UnregisteredType());
     });
 
+    // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
+
     // test_accepts_none
     py::class_<NoneTester, std::shared_ptr<NoneTester>>(m, "NoneTester")
         .def(py::init<>());
-    m.def("no_none1", &none1, py::arg().none(false));
-    m.def("no_none2", &none2, py::arg().none(false));
-    m.def("no_none3", &none3, py::arg().none(false));
-    m.def("no_none4", &none4, py::arg().none(false));
-    m.def("no_none5", &none5, py::arg().none(false));
+    m.def("no_none1", &none1, py::arg{}.none(false));
+    m.def("no_none2", &none2, py::arg{}.none(false));
+    m.def("no_none3", &none3, py::arg{}.none(false));
+    m.def("no_none4", &none4, py::arg{}.none(false));
+    m.def("no_none5", &none5, py::arg{}.none(false));
     m.def("ok_none1", &none1);
-    m.def("ok_none2", &none2, py::arg().none(true));
+    m.def("ok_none2", &none2, py::arg{}.none(true));
     m.def("ok_none3", &none3);
-    m.def("ok_none4", &none4, py::arg().none(true));
+    m.def("ok_none4", &none4, py::arg{}.none(true));
     m.def("ok_none5", &none5);
 
-    m.def("no_none_kwarg", &none2, py::arg("a").none(false));
-    m.def("no_none_kwarg_kw_only", &none2, py::kw_only(), py::arg("a").none(false));
+    m.def("no_none_kwarg", &none2, "a"_a.none(false));
+    m.def("no_none_kwarg_kw_only", &none2, py::kw_only(), "a"_a.none(false));
 
     // test_str_issue
     // Issue #283: __str__ called on uninitialized instance when constructor arguments invalid
diff --git a/tests/test_modules.py b/tests/test_modules.py
index 5630ccf..3390031 100644
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -75,3 +75,16 @@
     """Registering two things with the same name"""
 
     assert m.duplicate_registration() == []
+
+
+def test_builtin_key_type():
+    """Test that all the keys in the builtin modules have type str.
+
+    Previous versions of pybind11 would add a unicode key in python 2.
+    """
+    if hasattr(__builtins__, "keys"):
+        keys = __builtins__.keys()
+    else:  # this is to make pypy happy since builtins is different there.
+        keys = __builtins__.__dict__.keys()
+
+    assert {type(k) for k in keys} == {str}
diff --git a/tests/test_numpy_array.cpp b/tests/test_numpy_array.cpp
index a84de77..dca7145 100644
--- a/tests/test_numpy_array.cpp
+++ b/tests/test_numpy_array.cpp
@@ -258,9 +258,11 @@
     sm.def("overloaded2", [](py::array_t<std::complex<float>>) { return "float complex"; });
     sm.def("overloaded2", [](py::array_t<float>) { return "float"; });
 
+    // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
+
     // Only accept the exact types:
-    sm.def("overloaded3", [](py::array_t<int>) { return "int"; }, py::arg().noconvert());
-    sm.def("overloaded3", [](py::array_t<double>) { return "double"; }, py::arg().noconvert());
+    sm.def("overloaded3", [](py::array_t<int>) { return "int"; }, py::arg{}.noconvert());
+    sm.def("overloaded3", [](py::array_t<double>) { return "double"; }, py::arg{}.noconvert());
 
     // Make sure we don't do unsafe coercion (e.g. float to int) when not using forcecast, but
     // rather that float gets converted via the safe (conversion to double) overload:
@@ -284,7 +286,7 @@
         for (py::ssize_t i = 0; i < r.shape(0); i++)
             for (py::ssize_t j = 0; j < r.shape(1); j++)
                 r(i, j) += v;
-    }, py::arg().noconvert(), py::arg());
+    }, py::arg{}.noconvert(), py::arg());
 
     sm.def("proxy_init3", [](double start) {
         py::array_t<double, py::array::c_style> a({ 3, 3, 3 });
@@ -338,7 +340,7 @@
         for (py::ssize_t i = 0; i < r.shape(0); i++)
             for (py::ssize_t j = 0; j < r.shape(1); j++)
                 r(i, j) += v;
-    }, py::arg().noconvert(), py::arg());
+    }, py::arg{}.noconvert(), py::arg());
     sm.def("proxy_init3_dyn", [](double start) {
         py::array_t<double, py::array::c_style> a({ 3, 3, 3 });
         auto r = a.mutable_unchecked();
@@ -419,20 +421,20 @@
            py::arg("a"));
     sm.def("accept_double_noconvert",
            [](py::array_t<double, 0>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
     sm.def("accept_double_forcecast_noconvert",
            [](py::array_t<double, py::array::forcecast>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
     sm.def("accept_double_c_style_noconvert",
            [](py::array_t<double, py::array::c_style>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
     sm.def("accept_double_c_style_forcecast_noconvert",
            [](py::array_t<double, py::array::forcecast | py::array::c_style>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
     sm.def("accept_double_f_style_noconvert",
            [](py::array_t<double, py::array::f_style>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
     sm.def("accept_double_f_style_forcecast_noconvert",
            [](py::array_t<double, py::array::forcecast | py::array::f_style>) {},
-           py::arg("a").noconvert());
+           "a"_a.noconvert());
 }
diff --git a/tests/test_pickling.cpp b/tests/test_pickling.cpp
index 9dc63bd..1a48595 100644
--- a/tests/test_pickling.cpp
+++ b/tests/test_pickling.cpp
@@ -31,7 +31,8 @@
         using Pickleable::Pickleable;
     };
 
-    py::class_<Pickleable>(m, "Pickleable")
+    py::class_<Pickleable> pyPickleable(m, "Pickleable");
+    pyPickleable
         .def(py::init<std::string>())
         .def("value", &Pickleable::value)
         .def("extra1", &Pickleable::extra1)
@@ -43,17 +44,20 @@
         .def("__getstate__", [](const Pickleable &p) {
             /* Return a tuple that fully encodes the state of the object */
             return py::make_tuple(p.value(), p.extra1(), p.extra2());
-        })
-        .def("__setstate__", [](Pickleable &p, py::tuple t) {
-            if (t.size() != 3)
-                throw std::runtime_error("Invalid state!");
-            /* Invoke the constructor (need to use in-place version) */
-            new (&p) Pickleable(t[0].cast<std::string>());
-
-            /* Assign any additional state */
-            p.setExtra1(t[1].cast<int>());
-            p.setExtra2(t[2].cast<int>());
         });
+    ignoreOldStyleInitWarnings([&pyPickleable]() {
+        pyPickleable
+            .def("__setstate__", [](Pickleable &p, py::tuple t) {
+                if (t.size() != 3)
+                    throw std::runtime_error("Invalid state!");
+                /* Invoke the constructor (need to use in-place version) */
+                new (&p) Pickleable(t[0].cast<std::string>());
+
+                /* Assign any additional state */
+                p.setExtra1(t[1].cast<int>());
+                p.setExtra2(t[2].cast<int>());
+            });
+    });
 
     py::class_<PickleableNew, Pickleable>(m, "PickleableNew")
         .def(py::init<std::string>())
@@ -87,27 +91,31 @@
         using PickleableWithDict::PickleableWithDict;
     };
 
-    py::class_<PickleableWithDict>(m, "PickleableWithDict", py::dynamic_attr())
+    py::class_<PickleableWithDict> pyPickleableWithDict(m, "PickleableWithDict", py::dynamic_attr());
+    pyPickleableWithDict
         .def(py::init<std::string>())
         .def_readwrite("value", &PickleableWithDict::value)
         .def_readwrite("extra", &PickleableWithDict::extra)
         .def("__getstate__", [](py::object self) {
             /* Also include __dict__ in state */
             return py::make_tuple(self.attr("value"), self.attr("extra"), self.attr("__dict__"));
-        })
-        .def("__setstate__", [](py::object self, py::tuple t) {
-            if (t.size() != 3)
-                throw std::runtime_error("Invalid state!");
-            /* Cast and construct */
-            auto& p = self.cast<PickleableWithDict&>();
-            new (&p) PickleableWithDict(t[0].cast<std::string>());
-
-            /* Assign C++ state */
-            p.extra = t[1].cast<int>();
-
-            /* Assign Python state */
-            self.attr("__dict__") = t[2];
         });
+    ignoreOldStyleInitWarnings([&pyPickleableWithDict]() {
+        pyPickleableWithDict
+            .def("__setstate__", [](py::object self, py::tuple t) {
+                if (t.size() != 3)
+                    throw std::runtime_error("Invalid state!");
+                /* Cast and construct */
+                auto& p = self.cast<PickleableWithDict&>();
+                new (&p) PickleableWithDict(t[0].cast<std::string>());
+
+                /* Assign C++ state */
+                p.extra = t[1].cast<int>();
+
+                /* Assign Python state */
+                self.attr("__dict__") = t[2];
+            });
+    });
 
     py::class_<PickleableWithDictNew, PickleableWithDict>(m, "PickleableWithDictNew")
         .def(py::init<std::string>())
diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp
index 113cf5c..709611d 100644
--- a/tests/test_pytypes.cpp
+++ b/tests/test_pytypes.cpp
@@ -254,15 +254,18 @@
 
     m.def("convert_to_pybind11_str", [](py::object o) { return py::str(o); });
 
-    m.def("nonconverting_constructor", [](std::string type, py::object value) -> py::object {
+    m.def("nonconverting_constructor", [](std::string type, py::object value, bool move) -> py::object {
         if (type == "bytes") {
-            return py::bytes(value);
+            return move ? py::bytes(std::move(value)) : py::bytes(value);
         }
         else if (type == "none") {
-            return py::none(value);
+            return move ? py::none(std::move(value)) : py::none(value);
         }
         else if (type == "ellipsis") {
-            return py::ellipsis(value);
+            return move ? py::ellipsis(std::move(value)) : py::ellipsis(value);
+        }
+        else if (type == "type") {
+            return move ? py::type(std::move(value)) : py::type(value);
         }
         throw std::runtime_error("Invalid type");
     });
diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py
index 9e5c302..b1509a0 100644
--- a/tests/test_pytypes.py
+++ b/tests/test_pytypes.py
@@ -268,14 +268,16 @@
         ("bytes", range(10)),
         ("none", 42),
         ("ellipsis", 42),
+        ("type", 42),
     ]
     for t, v in non_converting_test_cases:
-        with pytest.raises(TypeError) as excinfo:
-            m.nonconverting_constructor(t, v)
-        expected_error = "Object of type '{}' is not an instance of '{}'".format(
-            type(v).__name__, t
-        )
-        assert str(excinfo.value) == expected_error
+        for move in [True, False]:
+            with pytest.raises(TypeError) as excinfo:
+                m.nonconverting_constructor(t, v, move)
+            expected_error = "Object of type '{}' is not an instance of '{}'".format(
+                type(v).__name__, t
+            )
+            assert str(excinfo.value) == expected_error
 
 
 def test_pybind11_str_raw_str():
diff --git a/tests/test_smart_ptr.cpp b/tests/test_smart_ptr.cpp
index 60c2e69..59996ed 100644
--- a/tests/test_smart_ptr.cpp
+++ b/tests/test_smart_ptr.cpp
@@ -8,8 +8,8 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#if defined(_MSC_VER) && _MSC_VER < 1910
-#  pragma warning(disable: 4702) // unreachable code in system header
+#if defined(_MSC_VER) && _MSC_VER < 1910  // VS 2015's MSVC
+#  pragma warning(disable: 4702) // unreachable code in system header (xatomic.h(382))
 #endif
 
 #include "pybind11_tests.h"
@@ -176,33 +176,63 @@
 
     // test_unique_nodelete
     // Object with a private destructor
+    class MyObject4;
+    static std::unordered_set<MyObject4 *> myobject4_instances;
     class MyObject4 {
     public:
-        MyObject4(int value) : value{value} { print_created(this); }
+        MyObject4(int value) : value{value} {
+            print_created(this);
+            myobject4_instances.insert(this);
+        }
         int value;
+
+        static void cleanupAllInstances() {
+            auto tmp = std::move(myobject4_instances);
+            myobject4_instances.clear();
+            for (auto o : tmp)
+                delete o;
+        }
     private:
-        ~MyObject4() { print_destroyed(this); }
+        ~MyObject4() {
+            myobject4_instances.erase(this);
+            print_destroyed(this);
+        }
     };
     py::class_<MyObject4, std::unique_ptr<MyObject4, py::nodelete>>(m, "MyObject4")
         .def(py::init<int>())
-        .def_readwrite("value", &MyObject4::value);
+        .def_readwrite("value", &MyObject4::value)
+        .def_static("cleanup_all_instances", &MyObject4::cleanupAllInstances);
 
     // test_unique_deleter
     // Object with std::unique_ptr<T, D> where D is not matching the base class
     // Object with a protected destructor
+    class MyObject4a;
+    static std::unordered_set<MyObject4a *> myobject4a_instances;
     class MyObject4a {
     public:
         MyObject4a(int i) {
             value = i;
             print_created(this);
+            myobject4a_instances.insert(this);
         };
         int value;
+
+        static void cleanupAllInstances() {
+            auto tmp = std::move(myobject4a_instances);
+            myobject4a_instances.clear();
+            for (auto o : tmp)
+                delete o;
+        }
     protected:
-        virtual ~MyObject4a() { print_destroyed(this); }
+        virtual ~MyObject4a() {
+            myobject4a_instances.erase(this);
+            print_destroyed(this);
+        }
     };
     py::class_<MyObject4a, std::unique_ptr<MyObject4a, py::nodelete>>(m, "MyObject4a")
         .def(py::init<int>())
-        .def_readwrite("value", &MyObject4a::value);
+        .def_readwrite("value", &MyObject4a::value)
+        .def_static("cleanup_all_instances", &MyObject4a::cleanupAllInstances);
 
     // Object derived but with public destructor and no Deleter in default holder
     class MyObject4b : public MyObject4a {
diff --git a/tests/test_smart_ptr.py b/tests/test_smart_ptr.py
index c55bffb..85f61a3 100644
--- a/tests/test_smart_ptr.py
+++ b/tests/test_smart_ptr.py
@@ -125,7 +125,9 @@
     cstats = ConstructorStats.get(m.MyObject4)
     assert cstats.alive() == 1
     del o
-    assert cstats.alive() == 1  # Leak, but that's intentional
+    assert cstats.alive() == 1
+    m.MyObject4.cleanup_all_instances()
+    assert cstats.alive() == 0
 
 
 def test_unique_nodelete4a():
@@ -134,19 +136,25 @@
     cstats = ConstructorStats.get(m.MyObject4a)
     assert cstats.alive() == 1
     del o
-    assert cstats.alive() == 1  # Leak, but that's intentional
+    assert cstats.alive() == 1
+    m.MyObject4a.cleanup_all_instances()
+    assert cstats.alive() == 0
 
 
 def test_unique_deleter():
+    m.MyObject4a(0)
     o = m.MyObject4b(23)
     assert o.value == 23
     cstats4a = ConstructorStats.get(m.MyObject4a)
-    assert cstats4a.alive() == 2  # Two because of previous test
+    assert cstats4a.alive() == 2
     cstats4b = ConstructorStats.get(m.MyObject4b)
     assert cstats4b.alive() == 1
     del o
-    assert cstats4a.alive() == 1  # Should now only be one leftover from previous test
+    assert cstats4a.alive() == 1  # Should now only be one leftover
     assert cstats4b.alive() == 0  # Should be deleted
+    m.MyObject4a.cleanup_all_instances()
+    assert cstats4a.alive() == 0
+    assert cstats4b.alive() == 0
 
 
 def test_large_holder():
diff --git a/tests/test_stl_binders.cpp b/tests/test_stl_binders.cpp
index 1c0df98..c791477 100644
--- a/tests/test_stl_binders.cpp
+++ b/tests/test_stl_binders.cpp
@@ -86,13 +86,13 @@
 
     // test_noncopyable_containers
     py::bind_vector<std::vector<E_nc>>(m, "VectorENC");
-    m.def("get_vnc", &one_to_n<std::vector<E_nc>>, py::return_value_policy::reference);
+    m.def("get_vnc", &one_to_n<std::vector<E_nc>>);
     py::bind_vector<std::deque<E_nc>>(m, "DequeENC");
-    m.def("get_dnc", &one_to_n<std::deque<E_nc>>, py::return_value_policy::reference);
+    m.def("get_dnc", &one_to_n<std::deque<E_nc>>);
     py::bind_map<std::map<int, E_nc>>(m, "MapENC");
-    m.def("get_mnc", &times_ten<std::map<int, E_nc>>, py::return_value_policy::reference);
+    m.def("get_mnc", &times_ten<std::map<int, E_nc>>);
     py::bind_map<std::unordered_map<int, E_nc>>(m, "UmapENC");
-    m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>, py::return_value_policy::reference);
+    m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>);
     // Issue #1885: binding nested std::map<X, Container<E>> with E non-copyable
     py::bind_map<std::map<int, std::vector<E_nc>>>(m, "MapVecENC");
     m.def("get_nvnc", [](int n)
@@ -102,11 +102,11 @@
                 for (int j = 1; j <= n; j++)
                     (*m)[i].emplace_back(j);
             return m;
-        }, py::return_value_policy::reference);
+        });
     py::bind_map<std::map<int, std::map<int, E_nc>>>(m, "MapMapENC");
-    m.def("get_nmnc", &times_hundred<std::map<int, std::map<int, E_nc>>>, py::return_value_policy::reference);
+    m.def("get_nmnc", &times_hundred<std::map<int, std::map<int, E_nc>>>);
     py::bind_map<std::unordered_map<int, std::unordered_map<int, E_nc>>>(m, "UmapUmapENC");
-    m.def("get_numnc", &times_hundred<std::unordered_map<int, std::unordered_map<int, E_nc>>>, py::return_value_policy::reference);
+    m.def("get_numnc", &times_hundred<std::unordered_map<int, std::unordered_map<int, E_nc>>>);
 
     // test_vector_buffer
     py::bind_vector<std::vector<unsigned char>>(m, "VectorUChar", py::buffer_protocol());
diff --git a/tests/test_virtual_functions.py b/tests/test_virtual_functions.py
index ae19930..f7d3bd1 100644
--- a/tests/test_virtual_functions.py
+++ b/tests/test_virtual_functions.py
@@ -251,8 +251,7 @@
                 == 'Tried to call pure virtual function "Base::dispatch"'
             )
 
-            p = PyClass1()
-            return m.dispatch_issue_go(p)
+            return m.dispatch_issue_go(PyClass1())
 
     b = PyClass2()
     assert m.dispatch_issue_go(b) == "Yay.."
diff --git a/tests/valgrind-numpy-scipy.supp b/tests/valgrind-numpy-scipy.supp
new file mode 100644
index 0000000..60e9f47
--- /dev/null
+++ b/tests/valgrind-numpy-scipy.supp
@@ -0,0 +1,118 @@
+# Valgrind suppression file for NumPy & SciPy errors and leaks in pybind11 tests
+
+{
+   Leaks when importing NumPy
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyObject_Malloc
+   fun:_PyObject_GC_Alloc
+   fun:_PyObject_GC_Malloc
+   fun:_PyObject_GC_NewVar
+   fun:tuple_alloc
+   fun:PyTuple_Pack
+   ...
+   fun:__pyx_pymod_exec_*
+}
+
+{
+   Leaks when importing NumPy (bis)
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyObject_Malloc
+   fun:_PyObject_New
+   fun:PyCode_NewWithPosOnlyArgs
+   fun:PyCode_New
+   ...
+   fun:__pyx_pymod_exec_*
+}
+
+{
+   Leaks when importing NumPy (tris)
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyObject_Malloc
+   fun:_PyObject_GC_Alloc
+   fun:_PyObject_GC_Malloc
+   fun:_PyObject_GC_NewVar
+   fun:tuple_alloc
+   fun:_PyTuple_FromArray
+   fun:_PyObject_MakeTpCall
+   fun:_PyObject_VectorcallTstate
+   fun:PyObject_Vectorcall
+   fun:call_function
+   fun:_PyEval_EvalFrameDefault
+   fun:_PyEval_EvalFrame
+   fun:function_code_fastcall
+   fun:_PyFunction_Vectorcall
+}
+
+{
+   Leaks when importing NumPy (quater)
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyObject_Malloc
+   fun:_PyObject_GC_Alloc
+   fun:_PyObject_GC_Malloc
+   fun:_PyObject_GC_NewVar
+   fun:tuple_alloc
+   fun:PyTuple_New
+   fun:r_object
+   fun:r_object
+   fun:r_object
+   fun:r_object
+}
+
+{
+   Leaks when importing NumPy (quinquies)
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyObject_Malloc
+   fun:_PyObject_GC_Alloc
+   fun:_PyObject_GC_Malloc
+   fun:_PyObject_GC_NewVar
+   fun:tuple_alloc
+   fun:PyTuple_New
+   fun:dictiter_iternextitem
+   fun:list_extend
+   fun:_PyList_Extend
+   fun:PySequence_List
+}
+
+{
+   Leak when importing scipy.fft
+   Memcheck:Leak
+   fun:_Znwm
+   fun:PyInit_pypocketfft
+   fun:_PyImport_LoadDynamicModuleWithSpec
+   fun:_imp_create_dynamic_impl.constprop.3
+   fun:_imp_create_dynamic
+   fun:cfunction_vectorcall_FASTCALL
+   fun:PyVectorcall_Call
+   fun:_PyObject_Call
+   fun:PyObject_Call
+   fun:do_call_core
+   fun:_PyEval_EvalFrameDefault
+   fun:_PyEval_EvalFrame
+   fun:_PyEval_EvalCode
+}
+
+{
+   NumPy leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   ...
+   fun:_buffer_get_info
+   fun:array_getbuffer
+   fun:PyObject_GetBuffer
+   fun:__Pyx__GetBufferAndValidate*
+   fun:__pyx_f_5numpy_6random_13bit_generator_12SeedSequence_mix_entropy
+   fun:__pyx_pw_5numpy_6random_13bit_generator_12SeedSequence_1__init__
+   fun:type_call
+   fun:__Pyx__PyObject_CallOneArg
+   fun:__pyx_pw_5numpy_6random_13bit_generator_12BitGenerator_1__init__
+}
diff --git a/tests/valgrind-python.supp b/tests/valgrind-python.supp
new file mode 100644
index 0000000..1dd04fa
--- /dev/null
+++ b/tests/valgrind-python.supp
@@ -0,0 +1,135 @@
+# Valgrind suppression file for CPython errors and leaks in pybind11 tests
+
+# Taken verbatim from https://github.com/python/cpython/blob/3.9/Misc/valgrind-python.supp#L266-L272
+{
+   Uninitialised byte(s) false alarm, see bpo-35561
+   Memcheck:Param
+   epoll_ctl(event)
+   fun:epoll_ctl
+   fun:pyepoll_internal_ctl
+}
+
+{
+   Python leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyMem_RawMalloc
+   fun:PyThread_allocate_lock
+   fun:_PyEval_InitState
+   fun:PyInterpreterState_New
+   ...
+   fun:pyinit_core*
+   fun:Py_InitializeFromConfig
+   fun:pymain_init
+   fun:pymain_main
+}
+
+{
+   Python leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:_PyMem_DebugRawAlloc
+   fun:_PyMem_DebugRawMalloc
+   fun:PyMem_RawMalloc
+   fun:PyThread_allocate_lock
+   fun:_PyRuntimeState_Init_impl
+   fun:_PyRuntimeState_Init
+   fun:_PyRuntime_Initialize
+   fun:pymain_init
+   fun:pymain_main
+   fun:Py_BytesMain
+}
+
+{
+   Python leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyMem_RawMalloc
+   fun:PyThread_allocate_lock
+   fun:_PyImport_AcquireLock
+   fun:_imp_acquire_lock_impl*
+   fun:_imp_acquire_lock
+   fun:cfunction_vectorcall_NOARGS
+   fun:_PyObject_VectorcallTstate
+   fun:PyObject_Vectorcall
+   fun:call_function
+   fun:_PyEval_EvalFrameDefault
+   fun:_PyEval_EvalFrame
+   fun:function_code_fastcall
+}
+
+{
+   Python leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyMem_RawMalloc
+   fun:PyThread_allocate_lock
+   fun:newlockobject
+   ...
+   fun:cfunction_vectorcall_NOARGS
+   fun:_PyObject_VectorcallTstate
+   fun:PyObject_Vectorcall
+   fun:call_function
+   fun:_PyEval_EvalFrameDefault
+   fun:_PyEval_EvalFrame
+   fun:function_code_fastcall
+   fun:_PyFunction_Vectorcall
+}
+
+{
+   Python leaks when spawning a subprocess
+   Memcheck:Leak
+   fun:malloc
+   fun:_PyMem_RawMalloc
+   fun:PyMem_RawMalloc
+   fun:PyThread_allocate_lock
+   fun:rlock_new
+   fun:type_call
+   fun:_PyObject_Call
+   fun:PyObject_Call
+   fun:do_call_core
+   fun:_PyEval_EvalFrameDefault
+   fun:_PyEval_EvalFrame
+   fun:_PyEval_EvalCode
+   fun:_PyFunction_Vectorcall
+}
+
+# Not really CPython-specific, see link
+{
+   dlopen leak (https://stackoverflow.com/questions/1542457/memory-leak-reported-by-valgrind-in-dlopen)
+   Memcheck:Leak
+   fun:malloc
+   ...
+   fun:dl_open_worker
+   fun:_dl_catch_exception
+   fun:_dl_open
+   fun:dlopen_doit
+   fun:_dl_catch_exception
+   fun:_dl_catch_error
+   fun:_dlerror_run
+   fun:dlopen@@GLIBC_2.2.5
+   fun:_PyImport_FindSharedFuncptr
+   fun:_PyImport_LoadDynamicModuleWithSpec
+}
+
+# Not really CPython-specific, see link
+{
+   dlopen leak (https://stackoverflow.com/questions/1542457/memory-leak-reported-by-valgrind-in-dlopen)
+   Memcheck:Leak
+   fun:malloc
+   ...
+   fun:dl_open_worker
+   fun:_dl_catch_exception
+   fun:_dl_open
+   fun:dlopen_doit
+   fun:_dl_catch_exception
+   fun:_dl_catch_error
+   fun:_dlerror_run
+   fun:dlopen@@GLIBC_2.2.5
+   fun:_PyImport_FindSharedFuncptr
+   fun:_PyImport_LoadDynamicModuleWithSpec
+}
diff --git a/tools/FindEigen3.cmake b/tools/FindEigen3.cmake
index 98ab43d..83625d9 100644
--- a/tools/FindEigen3.cmake
+++ b/tools/FindEigen3.cmake
@@ -64,6 +64,9 @@
   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
 
 else(EIGEN3_INCLUDE_DIR)
+  if(NOT DEFINED KDE4_INCLUDE_DIR)
+    set(KDE4_INCLUDE_DIR "")
+  endif()
 
   find_path(
     EIGEN3_INCLUDE_DIR
diff --git a/tools/FindPythonLibsNew.cmake b/tools/FindPythonLibsNew.cmake
index b712591..3605aeb 100644
--- a/tools/FindPythonLibsNew.cmake
+++ b/tools/FindPythonLibsNew.cmake
@@ -57,6 +57,8 @@
 
 if(PythonLibsNew_FIND_QUIETLY)
   set(_pythonlibs_quiet QUIET)
+else()
+  set(_pythonlibs_quiet "")
 endif()
 
 if(PythonLibsNew_FIND_REQUIRED)
diff --git a/tools/make_changelog.py b/tools/make_changelog.py
new file mode 100755
index 0000000..609ce2f
--- /dev/null
+++ b/tools/make_changelog.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+
+import ghapi.all
+
+from rich import print
+from rich.syntax import Syntax
+
+
+ENTRY = re.compile(
+    r"""
+    Suggested \s changelog \s entry:
+    .*
+    ```rst
+    \s*
+    (.*?)
+    \s*
+    ```
+""",
+    re.DOTALL | re.VERBOSE,
+)
+
+print()
+
+
+api = ghapi.all.GhApi(owner="pybind", repo="pybind11")
+
+issues = api.issues.list_for_repo(labels="needs changelog", state="closed")
+missing = []
+
+for issue in issues:
+    changelog = ENTRY.findall(issue.body)
+    if changelog:
+        (msg,) = changelog
+        if not msg.startswith("* "):
+            msg = "* " + msg
+        if not msg.endswith("."):
+            msg += "."
+
+        msg += f"\n  `#{issue.number} <{issue.html_url}>`_"
+
+        print(Syntax(msg, "rst", theme="ansi_light"))
+        print()
+
+    else:
+        missing.append(issue)
+
+if missing:
+    print()
+    print("[blue]" + "-" * 30)
+    print()
+
+    for issue in missing:
+        print(f"[red bold]Missing:[/red bold][red] {issue.title}")
+        print(f"[red]  {issue.html_url}\n")
+
+    print("[bold]Template:\n")
+    msg = "## Suggested changelog entry:\n\n```rst\n\n```"
+    print(Syntax(msg, "md", theme="ansi_light"))
+
+print()
diff --git a/tools/pybind11Config.cmake.in b/tools/pybind11Config.cmake.in
index 9808f3d..9921aeb 100644
--- a/tools/pybind11Config.cmake.in
+++ b/tools/pybind11Config.cmake.in
@@ -87,7 +87,7 @@
   target_link_libraries(MyModule2 pybind11::headers)
   set_target_properties(MyModule2 PROPERTIES
                                   INTERPROCEDURAL_OPTIMIZATION ON
-                                  CXX__VISIBILITY_PRESET ON
+                                  CXX_VISIBILITY_PRESET ON
                                   VISIBLITY_INLINES_HIDDEN ON)
 
 If you build targets yourself, you may be interested in stripping the output
diff --git a/tools/pybind11NewTools.cmake b/tools/pybind11NewTools.cmake
index 357cc61..18da8be 100644
--- a/tools/pybind11NewTools.cmake
+++ b/tools/pybind11NewTools.cmake
@@ -12,6 +12,8 @@
 
 if(pybind11_FIND_QUIETLY)
   set(_pybind11_quiet QUIET)
+else()
+  set(_pybind11_quiet "")
 endif()
 
 if(CMAKE_VERSION VERSION_LESS 3.12)
@@ -127,10 +129,20 @@
 # Check on every access - since Python2 and Python3 could have been used - do nothing in that case.
 
 if(DEFINED ${_Python}_INCLUDE_DIRS)
+  # Only add Python for build - must be added during the import for config
+  # since it has to be re-discovered.
+  #
+  # This needs to be a target to be included after the local pybind11
+  # directory, just in case there there is an installed pybind11 sitting
+  # next to Python's includes. It also ensures Python is a SYSTEM library.
+  add_library(pybind11::python_headers INTERFACE IMPORTED)
+  set_property(
+    TARGET pybind11::python_headers PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+                                             "$<BUILD_INTERFACE:${${_Python}_INCLUDE_DIRS}>")
   set_property(
     TARGET pybind11::pybind11
     APPEND
-    PROPERTY INTERFACE_INCLUDE_DIRECTORIES $<BUILD_INTERFACE:${${_Python}_INCLUDE_DIRS}>)
+    PROPERTY INTERFACE_LINK_LIBRARIES pybind11::python_headers)
   set(pybind11_INCLUDE_DIRS
       "${pybind11_INCLUDE_DIR}" "${${_Python}_INCLUDE_DIRS}"
       CACHE INTERNAL "Directories where pybind11 and possibly Python headers are located")
@@ -144,11 +156,11 @@
 endif()
 
 # In CMake 3.18+, you can find these separately, so include an if
-if(TARGET ${_Python}::${_Python})
+if(TARGET ${_Python}::Python)
   set_property(
     TARGET pybind11::embed
     APPEND
-    PROPERTY INTERFACE_LINK_LIBRARIES ${_Python}::${_Python})
+    PROPERTY INTERFACE_LINK_LIBRARIES ${_Python}::Python)
 endif()
 
 # CMake 3.15+ has this
@@ -170,27 +182,27 @@
   cmake_parse_arguments(PARSE_ARGV 1 ARG
                         "STATIC;SHARED;MODULE;THIN_LTO;OPT_SIZE;NO_EXTRAS;WITHOUT_SOABI" "" "")
 
-  if(ARG_ADD_LIBRARY_STATIC)
-    set(type STATIC)
-  elseif(ARG_ADD_LIBRARY_SHARED)
-    set(type SHARED)
+  if(ARG_STATIC)
+    set(lib_type STATIC)
+  elseif(ARG_SHARED)
+    set(lib_type SHARED)
   else()
-    set(type MODULE)
+    set(lib_type MODULE)
   endif()
 
   if("${_Python}" STREQUAL "Python")
-    python_add_library(${target_name} ${type} ${ARG_UNPARSED_ARGUMENTS})
+    python_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
   elseif("${_Python}" STREQUAL "Python3")
-    python3_add_library(${target_name} ${type} ${ARG_UNPARSED_ARGUMENTS})
+    python3_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
   elseif("${_Python}" STREQUAL "Python2")
-    python2_add_library(${target_name} ${type} ${ARG_UNPARSED_ARGUMENTS})
+    python2_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
   else()
     message(FATAL_ERROR "Cannot detect FindPython version: ${_Python}")
   endif()
 
   target_link_libraries(${target_name} PRIVATE pybind11::headers)
 
-  if(type STREQUAL "MODULE")
+  if(lib_type STREQUAL "MODULE")
     target_link_libraries(${target_name} PRIVATE pybind11::module)
   else()
     target_link_libraries(${target_name} PRIVATE pybind11::embed)
@@ -204,12 +216,21 @@
     target_link_libraries(${target_name} PRIVATE pybind11::python2_no_register)
   endif()
 
-  set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET "hidden"
-                                                  CUDA_VISIBILITY_PRESET "hidden")
+  # -fvisibility=hidden is required to allow multiple modules compiled against
+  # different pybind versions to work properly, and for some features (e.g.
+  # py::module_local).  We force it on everything inside the `pybind11`
+  # namespace; also turning it on for a pybind module compilation here avoids
+  # potential warnings or issues from having mixed hidden/non-hidden types.
+  if(NOT DEFINED CMAKE_CXX_VISIBILITY_PRESET)
+    set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET "hidden")
+  endif()
+
+  if(NOT DEFINED CMAKE_CUDA_VISIBILITY_PRESET)
+    set_target_properties(${target_name} PROPERTIES CUDA_VISIBILITY_PRESET "hidden")
+  endif()
 
   # If we don't pass a WITH_SOABI or WITHOUT_SOABI, use our own default handling of extensions
-  if("${type}" STREQUAL "MODULE" AND (NOT ARG_WITHOUT_SOABI OR NOT "WITH_SOABI" IN_LIST
-                                                               ARG_UNPARSED_ARGUMENTS))
+  if(NOT ARG_WITHOUT_SOABI OR NOT "WITH_SOABI" IN_LIST ARG_UNPARSED_ARGUMENTS)
     pybind11_extension(${target_name})
   endif()
 
diff --git a/tools/pybind11Tools.cmake b/tools/pybind11Tools.cmake
index 23cff98..3231353 100644
--- a/tools/pybind11Tools.cmake
+++ b/tools/pybind11Tools.cmake
@@ -10,6 +10,8 @@
 
 if(pybind11_FIND_QUIETLY)
   set(_pybind11_quiet QUIET)
+else()
+  set(_pybind11_quiet "")
 endif()
 
 # If this is the first run, PYTHON_VERSION can stand in for PYBIND11_PYTHON_VERSION
@@ -22,16 +24,21 @@
       CACHE STRING "Python version to use for compiling modules")
   unset(PYTHON_VERSION)
   unset(PYTHON_VERSION CACHE)
-else()
-  # If this is set as a normal variable, promote it, otherwise, make an empty cache variable.
+elseif(DEFINED PYBIND11_PYTHON_VERSION)
+  # If this is set as a normal variable, promote it
   set(PYBIND11_PYTHON_VERSION
       "${PYBIND11_PYTHON_VERSION}"
       CACHE STRING "Python version to use for compiling modules")
+else()
+  # Make an empty cache variable.
+  set(PYBIND11_PYTHON_VERSION
+      ""
+      CACHE STRING "Python version to use for compiling modules")
 endif()
 
 # A user can set versions manually too
 set(Python_ADDITIONAL_VERSIONS
-    "3.9;3.8;3.7;3.6;3.5;3.4"
+    "3.10;3.9;3.8;3.7;3.6;3.5;3.4"
     CACHE INTERNAL "")
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
@@ -81,11 +88,19 @@
   endif()
 endif()
 
-# Only add Python for build - must be added during the import for config since it has to be re-discovered.
+# Only add Python for build - must be added during the import for config since
+# it has to be re-discovered.
+#
+# This needs to be an target to it is included after the local pybind11
+# directory, just in case there are multiple versions of pybind11, we want the
+# one we expect.
+add_library(pybind11::python_headers INTERFACE IMPORTED)
+set_property(TARGET pybind11::python_headers PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+                                                      "$<BUILD_INTERFACE:${PYTHON_INCLUDE_DIRS}>")
 set_property(
   TARGET pybind11::pybind11
   APPEND
-  PROPERTY INTERFACE_INCLUDE_DIRECTORIES $<BUILD_INTERFACE:${PYTHON_INCLUDE_DIRS}>)
+  PROPERTY INTERFACE_LINK_LIBRARIES pybind11::python_headers)
 
 set(pybind11_INCLUDE_DIRS
     "${pybind11_INCLUDE_DIR}" "${PYTHON_INCLUDE_DIRS}"
@@ -166,8 +181,13 @@
   # py::module_local).  We force it on everything inside the `pybind11`
   # namespace; also turning it on for a pybind module compilation here avoids
   # potential warnings or issues from having mixed hidden/non-hidden types.
-  set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET "hidden"
-                                                  CUDA_VISIBILITY_PRESET "hidden")
+  if(NOT DEFINED CMAKE_CXX_VISIBILITY_PRESET)
+    set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET "hidden")
+  endif()
+
+  if(NOT DEFINED CMAKE_CUDA_VISIBILITY_PRESET)
+    set_target_properties(${target_name} PROPERTIES CUDA_VISIBILITY_PRESET "hidden")
+  endif()
 
   if(ARG_NO_EXTRAS)
     return()
