Merge "Upgrade pybind11 to 'v2.6.1'"
diff --git a/.appveyor.yml b/.appveyor.yml
index 8fbb726..149a8a3 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,64 +1,32 @@
 version: 1.0.{build}
 image:
-- Visual Studio 2017
 - Visual Studio 2015
 test: off
 skip_branch_with_pr: true
 build:
   parallel: true
 platform:
-- x64
 - x86
 environment:
   matrix:
   - PYTHON: 36
-    CPP: 14
     CONFIG: Debug
   - PYTHON: 27
-    CPP: 14
     CONFIG: Debug
-  - CONDA: 36
-    CPP: latest
-    CONFIG: Release
-matrix:
-  exclude:
-    - image: Visual Studio 2015
-      platform: x86
-    - image: Visual Studio 2015
-      CPP: latest
-    - image: Visual Studio 2017
-      CPP: latest
-      platform: x86
 install:
 - ps: |
-    if ($env:PLATFORM -eq "x64") { $env:CMAKE_ARCH = "x64" }
-    if ($env:APPVEYOR_JOB_NAME -like "*Visual Studio 2017*") {
-      $env:CMAKE_GENERATOR = "Visual Studio 15 2017"
-      $env:CMAKE_INCLUDE_PATH = "C:\Libraries\boost_1_64_0"
-      $env:CXXFLAGS = "-permissive-"
-    } else {
-      $env:CMAKE_GENERATOR = "Visual Studio 14 2015"
-    }
-    if ($env:PYTHON) {
-      if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" }
-      $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH"
-      python -W ignore -m pip install --upgrade pip wheel
-      python -W ignore -m pip install pytest numpy --no-warn-script-location
-    } elseif ($env:CONDA) {
-      if ($env:CONDA -eq "27") { $env:CONDA = "" }
-      if ($env:PLATFORM -eq "x64") { $env:CONDA = "$env:CONDA-x64" }
-      $env:PATH = "C:\Miniconda$env:CONDA\;C:\Miniconda$env:CONDA\Scripts\;$env:PATH"
-      $env:PYTHONHOME = "C:\Miniconda$env:CONDA"
-      conda --version
-      conda install -y -q pytest numpy scipy
-    }
+    $env:CMAKE_GENERATOR = "Visual Studio 14 2015"
+    if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" }
+    $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH"
+    python -W ignore -m pip install --upgrade pip wheel
+    python -W ignore -m pip install pytest numpy --no-warn-script-location
 - ps: |
-    Start-FileDownload 'http://bitbucket.org/eigen/eigen/get/3.3.3.zip'
-    7z x 3.3.3.zip -y > $null
-    $env:CMAKE_INCLUDE_PATH = "eigen-eigen-67e894c6cd8f;$env:CMAKE_INCLUDE_PATH"
+    Start-FileDownload 'https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.zip'
+    7z x eigen-3.3.7.zip -y > $null
+    $env:CMAKE_INCLUDE_PATH = "eigen-3.3.7;$env:CMAKE_INCLUDE_PATH"
 build_script:
 - cmake -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%"
-    -DPYBIND11_CPP_STANDARD=/std:c++%CPP%
+    -DCMAKE_CXX_STANDARD=14
     -DPYBIND11_WERROR=ON
     -DDOWNLOAD_CATCH=ON
     -DCMAKE_SUPPRESS_REGENERATION=1
@@ -66,5 +34,4 @@
 - set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
 - cmake --build . --config %CONFIG% --target pytest -- /m /v:m /logger:%MSBuildLogger%
 - cmake --build . --config %CONFIG% --target cpptest -- /m /v:m /logger:%MSBuildLogger%
-- if "%CPP%"=="latest" (cmake --build . --config %CONFIG% --target test_cmake_build -- /m /v:m /logger:%MSBuildLogger%)
 on_failure: if exist "tests\test_cmake_build" type tests\test_cmake_build\*.log*
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 0000000..e29d929
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,13 @@
+FormatStyle: file
+
+Checks: '
+llvm-namespace-comment,
+modernize-use-override,
+readability-container-size-empty,
+modernize-use-using,
+modernize-use-equals-default,
+modernize-use-auto,
+modernize-use-emplace,
+'
+
+HeaderFilterRegex: 'pybind11/.*h'
diff --git a/.cmake-format.yaml b/.cmake-format.yaml
new file mode 100644
index 0000000..a2a69f3
--- /dev/null
+++ b/.cmake-format.yaml
@@ -0,0 +1,73 @@
+parse:
+  additional_commands:
+    pybind11_add_module:
+      flags:
+        - THIN_LTO
+        - MODULE
+        - SHARED
+        - NO_EXTRAS
+        - EXCLUDE_FROM_ALL
+        - SYSTEM
+
+format:
+  line_width: 99
+  tab_size: 2
+
+  # If an argument group contains more than this many sub-groups
+  # (parg or kwarg groups) then force it to a vertical layout.
+  max_subgroups_hwrap: 2
+
+  # If a positional argument group contains more than this many
+  # arguments, then force it to a vertical layout.
+  max_pargs_hwrap: 6
+
+  # If a cmdline positional group consumes more than this many
+  # lines without nesting, then invalidate the layout (and nest)
+  max_rows_cmdline: 2
+  separate_ctrl_name_with_space: false
+  separate_fn_name_with_space: false
+  dangle_parens: false
+
+  # If the trailing parenthesis must be 'dangled' on its on
+  # 'line, then align it to this reference: `prefix`: the start'
+  # 'of the statement,  `prefix-indent`: the start of the'
+  # 'statement, plus one indentation  level, `child`: align to'
+  # the column of the arguments
+  dangle_align: prefix
+  # If the statement spelling length (including space and
+  # parenthesis) is smaller than this amount, then force reject
+  # nested layouts.
+  min_prefix_chars: 4
+
+  # If the statement spelling length (including space and
+  # parenthesis) is larger than the tab width by more than this
+  # amount, then force reject un-nested layouts.
+  max_prefix_chars: 10
+
+  # If a candidate layout is wrapped horizontally but it exceeds
+  # this many lines, then reject the layout.
+  max_lines_hwrap: 2
+
+  line_ending: unix
+
+  # Format command names consistently as 'lower' or 'upper' case
+  command_case: canonical
+
+  # Format keywords consistently as 'lower' or 'upper' case
+  # unchanged is valid too
+  keyword_case: 'upper'
+
+  # A list of command names which should always be wrapped
+  always_wrap: []
+
+  # If true, the argument lists which are known to be sortable
+  # will be sorted lexicographically
+  enable_sort: true
+
+  # If true, the parsers may infer whether or not an argument
+  # list is sortable (without annotation).
+  autosort: false
+
+# Causes a few issues - can be solved later, possibly.
+markup:
+  enable_markup: false
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..4ced21b
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,319 @@
+Thank you for your interest in this project! Please refer to the following
+sections on how to contribute code and bug reports.
+
+### Reporting bugs
+
+Before submitting a question or bug report, please take a moment of your time
+and ensure that your issue isn't already discussed in the project documentation
+provided at [pybind11.readthedocs.org][] or in the [issue tracker][]. You can
+also check [gitter][] to see if it came up before.
+
+Assuming that you have identified a previously unknown problem or an important
+question, it's essential that you submit a self-contained and minimal piece of
+code that reproduces the problem. In other words: no external dependencies,
+isolate the function(s) that cause breakage, submit matched and complete C++
+and Python snippets that can be easily compiled and run in isolation; or
+ideally make a small PR with a failing test case that can be used as a starting
+point.
+
+## Pull requests
+
+Contributions are submitted, reviewed, and accepted using GitHub pull requests.
+Please refer to [this article][using pull requests] for details and adhere to
+the following rules to make the process as smooth as possible:
+
+* Make a new branch for every feature you're working on.
+* Make small and clean pull requests that are easy to review but make sure they
+  do add value by themselves.
+* Add tests for any new functionality and run the test suite (`cmake --build
+  build --target pytest`) to ensure that no existing features break.
+* Please run [`pre-commit`][pre-commit] to check your code matches the
+  project style. (Note that `gawk` is required.) Use `pre-commit run
+  --all-files` before committing (or use installed-mode, check pre-commit docs)
+  to verify your code passes before pushing to save time.
+* This project has a strong focus on providing general solutions using a
+  minimal amount of code, thus small pull requests are greatly preferred.
+
+### Licensing of contributions
+
+pybind11 is provided under a BSD-style license that can be found in the
+``LICENSE`` file. By using, distributing, or contributing to this project, you
+agree to the terms and conditions of this license.
+
+You are under no obligation whatsoever to provide any bug fixes, patches, or
+upgrades to the features, functionality or performance of the source code
+("Enhancements") to anyone; however, if you choose to make your Enhancements
+available either publicly, or directly to the author of this software, without
+imposing a separate written license agreement for such Enhancements, then you
+hereby grant the following license: a non-exclusive, royalty-free perpetual
+license to install, use, modify, prepare derivative works, incorporate into
+other computer software, distribute, and sublicense such enhancements or
+derivative works thereof, in binary and source code form.
+
+
+## Development of pybind11
+
+To setup an ideal development environment, run the following commands on a
+system with CMake 3.14+:
+
+```bash
+python3 -m venv venv
+source venv/bin/activate
+pip install -r tests/requirements.txt
+cmake -S . -B build -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON
+cmake --build build -j4
+```
+
+Tips:
+
+* You can use `virtualenv` (from PyPI) instead of `venv` (which is Python 3
+  only).
+* You can select any name for your environment folder; if it contains "env" it
+  will be ignored by git.
+* If you don’t have CMake 3.14+, just add “cmake” to the pip install command.
+* You can use `-DPYBIND11_FINDPYTHON=ON` to use FindPython on CMake 3.12+
+* In classic mode, you may need to set `-DPYTHON_EXECUTABLE=/path/to/python`.
+  FindPython uses `-DPython_ROOT_DIR=/path/to` or
+  `-DPython_EXECUTABLE=/path/to/python`.
+
+### Configuration options
+
+In CMake, configuration options are given with “-D”. Options are stored in the
+build directory, in the `CMakeCache.txt` file, so they are remembered for each
+build directory. Two selections are special - the generator, given with `-G`,
+and the compiler, which is selected based on environment variables `CXX` and
+similar, or `-DCMAKE_CXX_COMPILER=`. Unlike the others, these cannot be changed
+after the initial run.
+
+The valid options are:
+
+* `-DCMAKE_BUILD_TYPE`: Release, Debug, MinSizeRel, RelWithDebInfo
+* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+’s FindPython instead of the
+  classic, deprecated, custom FindPythonLibs
+* `-DPYBIND11_NOPYTHON=ON`: Disable all Python searching (disables tests)
+* `-DBUILD_TESTING=ON`: Enable the tests
+* `-DDOWNLOAD_CATCH=ON`: Download catch to build the C++ tests
+* `-DOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests
+* `-DPYBIND11_INSTALL=ON/OFF`: Enable the install target (on by default for the
+  master project)
+* `-DUSE_PYTHON_INSTALL_DIR=ON`: Try to install into the python dir
+
+
+<details><summary>A few standard CMake tricks: (click to expand)</summary><p>
+
+* Use `cmake --build build -v` to see the commands used to build the files.
+* Use `cmake build -LH` to list the CMake options with help.
+* Use `ccmake` if available to see a curses (terminal) gui, or `cmake-gui` for
+  a completely graphical interface (not present in the PyPI package).
+* Use `cmake --build build -j12` to build with 12 cores (for example).
+* Use `-G` and the name of a generator to use something different. `cmake
+  --help` lists the generators available.
+      - On Unix, setting `CMAKE_GENERATER=Ninja` in your environment will give
+        you automatic mulithreading on all your CMake projects!
+* Open the `CMakeLists.txt` with QtCreator to generate for that IDE.
+* You can use `-DCMAKE_EXPORT_COMPILE_COMMANDS=ON` to generate the `.json` file
+  that some tools expect.
+
+</p></details>
+
+
+To run the tests, you can "build" the check target:
+
+```bash
+cmake --build build --target check
+```
+
+`--target` can be spelled `-t` in CMake 3.15+. You can also run individual
+tests with these targets:
+
+* `pytest`: Python tests only
+* `cpptest`: C++ tests only
+* `test_cmake_build`: Install / subdirectory tests
+
+If you want to build just a subset of tests, use
+`-DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp"`. If this is
+empty, all tests will be built.
+
+### Formatting
+
+All formatting is handled by pre-commit.
+
+Install with brew (macOS) or pip (any OS):
+
+```bash
+# Any OS
+python3 -m pip install pre-commit
+
+# OR macOS with homebrew:
+brew install pre-commit
+```
+
+Then, you can run it on the items you've added to your staging area, or all
+files:
+
+```bash
+pre-commit run
+# OR
+pre-commit run --all-files
+```
+
+And, if you want to always use it, you can install it as a git hook (hence the
+name, pre-commit):
+
+```bash
+pre-commit install
+```
+
+### Clang-Tidy
+
+To run Clang tidy, the following recipe should work. Files will be modified in
+place, so you can use git to monitor the changes.
+
+```bash
+docker run --rm -v $PWD:/pybind11 -it silkeh/clang:10
+apt-get update && apt-get install python3-dev python3-pytest
+cmake -S pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-fix"
+cmake --build build
+```
+
+### Include what you use
+
+To run include what you use, install (`brew install include-what-you-use` on
+macOS), then run:
+
+```bash
+cmake -S . -B build-iwyu -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE=$(which include-what-you-use)
+cmake --build build
+```
+
+The report is sent to stderr; you can pip it into a file if you wish.
+
+### Build recipes
+
+This builds with the Intel compiler (assuming it is in your path, along with a
+recent CMake and Python 3):
+
+```bash
+python3 -m venv venv
+. venv/bin/activate
+pip install pytest
+cmake -S . -B build-intel -DCMAKE_CXX_COMPILER=$(which icpc) -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DPYBIND11_WERROR=ON
+```
+
+This will test the PGI compilers:
+
+```bash
+docker run --rm -it -v $PWD:/pybind11 nvcr.io/hpc/pgi-compilers:ce
+apt-get update && apt-get install -y python3-dev python3-pip python3-pytest
+wget -qO- "https://cmake.org/files/v3.18/cmake-3.18.2-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
+cmake -S pybind11/ -B build
+cmake --build build
+```
+
+### Explanation of the SDist/wheel building design
+
+> These details below are _only_ for packaging the Python sources from git. The
+> SDists and wheels created do not have any extra requirements at all and are
+> completely normal.
+
+The main objective of the packaging system is to create SDists (Python's source
+distribution packages) and wheels (Python's binary distribution packages) that
+include everything that is needed to work with pybind11, and which can be
+installed without any additional dependencies. This is more complex than it
+appears: in order to support CMake as a first class language even when using
+the PyPI package, they must include the _generated_ CMake files (so as not to
+require CMake when installing the `pybind11` package itself). They should also
+provide the option to install to the "standard" location
+(`<ENVROOT>/include/pybind11` and `<ENVROOT>/share/cmake/pybind11`) so they are
+easy to find with CMake, but this can cause problems if you are not an
+environment or using ``pyproject.toml`` requirements. This was solved by having
+two packages; the "nice" pybind11 package that stores the includes and CMake
+files inside the package, that you get access to via functions in the package,
+and a `pybind11-global` package that can be included via `pybind11[global]` if
+you want the more invasive but discoverable file locations.
+
+If you want to install or package the GitHub source, it is best to have Pip 10
+or newer on Windows, macOS, or Linux (manylinux1 compatible, includes most
+distributions).  You can then build the SDists, or run any procedure that makes
+SDists internally, like making wheels or installing.
+
+
+```bash
+# Editable development install example
+python3 -m pip install -e .
+```
+
+Since Pip itself does not have an `sdist` command (it does have `wheel` and
+`install`), you may want to use the upcoming `build` package:
+
+```bash
+python3 -m pip install build
+
+# Normal package
+python3 -m build -s .
+
+# Global extra
+PYBIND11_GLOBAL_SDIST=1 python3 -m build -s .
+```
+
+If you want to use the classic "direct" usage of `python setup.py`, you will
+need CMake 3.15+ and either `make` or `ninja` preinstalled (possibly via `pip
+install cmake ninja`), since directly running Python on `setup.py` cannot pick
+up and install `pyproject.toml` requirements. As long as you have those two
+things, though, everything works the way you would expect:
+
+```bash
+# Normal package
+python3 setup.py sdist
+
+# Global extra
+PYBIND11_GLOBAL_SDIST=1 python3 setup.py sdist
+```
+
+A detailed explanation of the build procedure design for developers wanting to
+work on or maintain the packaging system is as follows:
+
+#### 1. Building from the source directory
+
+When you invoke any `setup.py` command from the source directory, including
+`pip wheel .` and `pip install .`, you will activate a full source build. This
+is made of the following steps:
+
+1. If the tool is PEP 518 compliant, like Pip 10+, it will create a temporary
+   virtual environment and install the build requirements (mostly CMake) into
+   it. (if you are not on Windows, macOS, or a manylinux compliant system, you
+   can disable this with `--no-build-isolation` as long as you have CMake 3.15+
+   installed)
+2. The environment variable `PYBIND11_GLOBAL_SDIST` is checked - if it is set
+   and truthy, this will be make the accessory `pybind11-global` package,
+   instead of the normal `pybind11` package. This package is used for
+   installing the files directly to your environment root directory, using
+   `pybind11[global]`.
+2. `setup.py` reads the version from `pybind11/_version.py` and verifies it
+   matches `includes/pybind11/detail/common.h`.
+3. CMake is run with `-DCMAKE_INSTALL_PREIFX=pybind11`. Since the CMake install
+   procedure uses only relative paths and is identical on all platforms, these
+   files are valid as long as they stay in the correct relative position to the
+   includes. `pybind11/share/cmake/pybind11` has the CMake files, and
+   `pybind11/include` has the includes. The build directory is discarded.
+4. Simpler files are placed in the SDist: `tools/setup_*.py.in`,
+   `tools/pyproject.toml` (`main` or `global`)
+5. The package is created by running the setup function in the
+   `tools/setup_*.py`.  `setup_main.py` fills in Python packages, and
+   `setup_global.py` fills in only the data/header slots.
+6. A context manager cleans up the temporary CMake install directory (even if
+   an error is thrown).
+
+### 2. Building from SDist
+
+Since the SDist has the rendered template files in `tools` along with the
+includes and CMake files in the correct locations, the builds are completely
+trivial and simple. No extra requirements are required. You can even use Pip 9
+if you really want to.
+
+
+[pre-commit]: https://pre-commit.com
+[pybind11.readthedocs.org]: http://pybind11.readthedocs.org/en/latest
+[issue tracker]: https://github.com/pybind/pybind11/issues
+[gitter]: https://gitter.im/pybind/Lobby
+[using pull requests]: https://help.github.com/articles/using-pull-requests
diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
new file mode 100644
index 0000000..ae36ea6
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -0,0 +1,28 @@
+---
+name: Bug Report
+about: File an issue about a bug
+title: "[BUG] "
+---
+
+
+Make sure you've completed the following steps before submitting your issue -- thank you!
+
+1. Make sure you've read the [documentation][]. Your issue may be addressed there.
+2. Search the [issue tracker][] to verify that this hasn't already been reported. +1 or comment there if it has.
+3. Consider asking first in the [Gitter chat room][].
+4. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible.
+    a. If possible, make a PR with a new, failing test to give us a starting point to work on!
+
+[documentation]: https://pybind11.readthedocs.io
+[issue tracker]: https://github.com/pybind/pybind11/issues
+[Gitter chat room]: https://gitter.im/pybind/Lobby
+
+*After reading, remove this checklist and the template text in parentheses below.*
+
+## Issue description
+
+(Provide a short description, state the expected behavior and what actually happens.)
+
+## Reproducible example code
+
+(The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.)
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..20e7431
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Gitter room
+    url: https://gitter.im/pybind/Lobby
+    about: A room for discussing pybind11 with an active community
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 0000000..5f6ec81
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,16 @@
+---
+name: Feature Request
+about: File an issue about adding a feature
+title: "[FEAT] "
+---
+
+
+Make sure you've completed the following steps before submitting your issue -- thank you!
+
+1. Check if your feature has already been mentioned / rejected / planned in other issues.
+2. If those resources didn't help, consider asking in the [Gitter chat room][] to see if this is interesting / useful to a larger audience and possible to implement reasonably,
+4. If you have a useful feature that passes the previous items (or not suitable for chat), please fill in the details below.
+
+[Gitter chat room]: https://gitter.im/pybind/Lobby
+
+*After reading, remove this checklist.*
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
new file mode 100644
index 0000000..b199b6e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,21 @@
+---
+name: Question
+about: File an issue about unexplained behavior
+title: "[QUESTION] "
+---
+
+If you have a question, please check the following first:
+
+1. Check if your question has already been answered in the [FAQ][] section.
+2. Make sure you've read the [documentation][]. Your issue may be addressed there.
+3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room][]
+4. Search the [issue tracker][], including the closed issues, to see if your question has already been asked/answered. +1 or comment if it has been asked but has no answer.
+5. If you have a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below.
+6. Include a self-contained and minimal piece of code that illustrates your question. If that's not possible, try to make the description as clear as possible.
+
+[FAQ]: http://pybind11.readthedocs.io/en/latest/faq.html
+[documentation]: https://pybind11.readthedocs.io
+[issue tracker]: https://github.com/pybind/pybind11/issues
+[Gitter chat room]: https://gitter.im/pybind/Lobby
+
+*After reading, remove this checklist.*
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..c1eac3c
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
+    ignore:
+      # Offical actions have moving tags like v1
+      # that are used, so they don't need updates here
+      - dependency-name: "actions/*"
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000..abb0d05
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,8 @@
+docs:
+- any:
+  - 'docs/**/*.rst'
+  - '!docs/changelog.rst'
+  - '!docs/upgrade.rst'
+
+ci:
+- '.github/workflows/*.yml'
diff --git a/.github/labeler_merged.yml b/.github/labeler_merged.yml
new file mode 100644
index 0000000..2374ad4
--- /dev/null
+++ b/.github/labeler_merged.yml
@@ -0,0 +1,3 @@
+needs changelog:
+- all:
+  - '!docs/changelog.rst'
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..5570f6f
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,14 @@
+## Description
+
+<!-- Include relevant issues or PRs here, describe what changed and why -->
+
+
+## Suggested changelog entry:
+
+<!-- fill in the below block with the expected RestructuredText entry (delete if no entry needed) -->
+
+```rst
+
+```
+
+<!-- If the upgrade guide needs updating, note that here too -->
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..73424f9
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,676 @@
+name: CI
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - master
+      - stable
+      - v*
+
+jobs:
+  # This is the "main" test suite, which tests a large number of different
+  # versions of default compilers and Python versions in GitHub Actions.
+  standard:
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on: [ubuntu-latest, windows-latest, macos-latest]
+        python:
+        - 2.7
+        - 3.5
+        - 3.6
+        - 3.7
+        - 3.8
+        - 3.9
+        # - 3.10.0-alpha.1 - need next release for pybind11 fix
+        - pypy2
+        - pypy3
+
+        # Items in here will either be added to the build matrix (if not
+        # present), or add new keys to an existing matrix element if all the
+        # existing keys match.
+        #
+        # We support three optional keys: args (both build), args1 (first
+        # build), and args2 (second build).
+        include:
+          # Just add a key
+          - runs-on: ubuntu-latest
+            python: 3.6
+            args: >
+              -DPYBIND11_FINDPYTHON=ON
+          - runs-on: windows-latest
+            python: 3.6
+            args: >
+              -DPYBIND11_FINDPYTHON=ON
+          - runs-on: ubuntu-latest
+            python: 3.8
+            args: >
+              -DPYBIND11_FINDPYTHON=ON
+
+        # These items will be removed from the build matrix, keys must match.
+        exclude:
+            # Currently 32bit only, and we build 64bit
+          - runs-on: windows-latest
+            python: pypy2
+          - runs-on: windows-latest
+            python: pypy3
+
+          # Let's drop a few macOS runs since that tends to be 2.7 or 3.8+
+          - runs-on: macos-latest
+            python: 3.6
+          - runs-on: macos-latest
+            python: 3.7
+
+    name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • x64 ${{ matrix.args }}"
+    runs-on: ${{ matrix.runs-on }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python }}
+
+    - name: Setup Boost (Windows / Linux latest)
+      shell: bash
+      run: echo "BOOST_ROOT=$BOOST_ROOT_1_72_0" >> $GITHUB_ENV
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.4
+
+    - name: Cache wheels
+      if: runner.os == 'macOS'
+      uses: actions/cache@v2
+      with:
+        # This path is specific to macOS - we really only need it for PyPy NumPy wheels
+        # See https://github.com/actions/cache/blob/master/examples.md#python---pip
+        # for ways to do this more generally
+        path: ~/Library/Caches/pip
+        # Look to see if there is a cache hit for the corresponding requirements file
+        key: ${{ runner.os }}-pip-${{ matrix.python }}-x64-${{ hashFiles('tests/requirements.txt') }}
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    - name: Setup annotations on Linux
+      if: runner.os == 'Linux'
+      run: python -m pip install pytest-github-actions-annotate-failures
+
+    # First build - C++11 mode and inplace
+    - name: Configure C++11 ${{ matrix.args }}
+      run: >
+        cmake -S . -B .
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=11
+        ${{ matrix.args }}
+
+    - name: Build C++11
+      run: cmake --build . -j 2
+
+    - name: Python tests C++11
+      run: cmake --build . --target pytest -j 2
+
+    - name: C++11 tests
+      # TODO: Figure out how to load the DLL on Python 3.8+
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9))"
+      run: cmake --build .  --target cpptest -j 2
+
+    - name: Interface test C++11
+      run: cmake --build . --target test_cmake_build
+
+    - name: Clean directory
+      run: git clean -fdx
+
+    # Second build - C++17 mode and in a build directory
+    - name: Configure ${{ matrix.args2 }}
+      run: >
+        cmake -S . -B build2
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=17
+        ${{ matrix.args }}
+        ${{ matrix.args2 }}
+
+    - name: Build
+      run: cmake --build build2 -j 2
+
+    - name: Python tests
+      run: cmake --build build2 --target pytest
+
+    - name: C++ tests
+      # TODO: Figure out how to load the DLL on Python 3.8+
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9))"
+      run: cmake --build build2 --target cpptest
+
+    - name: Interface test
+      run: cmake --build build2 --target test_cmake_build
+
+    # Eventually Microsoft might have an action for setting up
+    # MSVC, but for now, this action works:
+    - name: Prepare compiler environment for Windows 🐍 2.7
+      if: matrix.python == 2.7 && runner.os == 'Windows'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: x64
+
+    # This makes two environment variables available in the following step(s)
+    - name: Set Windows 🐍 2.7 environment variables
+      if: matrix.python == 2.7 && runner.os == 'Windows'
+      shell: bash
+      run: |
+        echo "DISTUTILS_USE_SDK=1" >> $GITHUB_ENV
+        echo "MSSdk=1" >> $GITHUB_ENV
+
+    # This makes sure the setup_helpers module can build packages using
+    # setuptools
+    - name: Setuptools helpers test
+      run: pytest tests/extra_setuptools
+
+
+  # Testing on clang using the excellent silkeh clang docker images
+  clang:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        clang:
+          - 3.6
+          - 3.7
+          - 3.9
+          - 7
+          - 9
+          - dev
+        std:
+          - 11
+        include:
+          - clang: 5
+            std: 14
+          - clang: 10
+            std: 20
+          - clang: 10
+            std: 17
+
+    name: "🐍 3 • Clang ${{ matrix.clang }} • C++${{ matrix.std }} • x64"
+    container: "silkeh/clang:${{ matrix.clang }}"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Add wget and python3
+      run: apt-get update && apt-get install -y python3-dev python3-numpy python3-pytest libeigen3-dev
+
+    - name: Configure
+      shell: bash
+      run: >
+        cmake -S . -B build
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DCMAKE_CXX_STANDARD=${{ matrix.std }}
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build
+      run: cmake --build build -j 2
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++ tests
+      run: cmake --build build --target cpptest
+
+    - name: Interface test
+      run: cmake --build build --target test_cmake_build
+
+
+  # Testing NVCC; forces sources to behave like .cu files
+  cuda:
+    runs-on: ubuntu-latest
+    name: "🐍 3.8 • CUDA 11 • Ubuntu 20.04"
+    container: nvidia/cuda:11.0-devel-ubuntu20.04
+
+    steps:
+    - uses: actions/checkout@v2
+
+    # tzdata will try to ask for the timezone, so set the DEBIAN_FRONTEND
+    - name: Install 🐍 3
+      run: apt-get update && DEBIAN_FRONTEND="noninteractive" apt-get install -y cmake git python3-dev python3-pytest python3-numpy
+
+    - name: Configure
+      run: cmake -S . -B build -DPYBIND11_CUDA_TESTS=ON -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON
+
+    - name: Build
+      run: cmake --build build -j2 --verbose
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+
+  # Testing CentOS 8 + PGI compilers
+  centos-nvhpc8:
+    runs-on: ubuntu-latest
+    name: "🐍 3 • CentOS8 / PGI 20.7 • x64"
+    container: centos:8
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Add Python 3 and a few requirements
+      run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules
+
+    - name: Install CMake with pip
+      run: |
+        python3 -m pip install --upgrade pip
+        python3 -m pip install cmake --prefer-binary
+
+    - name: Install NVidia HPC SDK
+      run: yum -y install https://developer.download.nvidia.com/hpc-sdk/nvhpc-20-7-20.7-1.x86_64.rpm https://developer.download.nvidia.com/hpc-sdk/nvhpc-2020-20.7-1.x86_64.rpm
+
+    - name: Configure
+      shell: bash
+      run: |
+        source /etc/profile.d/modules.sh
+        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.7
+        cmake -S . -B build -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=14 -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build
+      run: cmake --build build -j 2 --verbose
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++ tests
+      run: cmake --build build --target cpptest
+
+    - name: Interface test
+      run: cmake --build build --target test_cmake_build
+
+
+  # Testing on CentOS 7 + PGI compilers, which seems to require more workarounds
+  centos-nvhpc7:
+    runs-on: ubuntu-latest
+    name: "🐍 3 • CentOS7 / PGI 20.9 • x64"
+    container: centos:7
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Add Python 3 and a few requirements
+      run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3
+
+    - name: Install NVidia HPC SDK
+      run:  yum -y install https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-20-9-20.9-1.x86_64.rpm https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-2020-20.9-1.x86_64.rpm
+
+    # On CentOS 7, we have to filter a few tests (compiler internal error)
+    # and allow deeper templete recursion (not needed on CentOS 8 with a newer
+    # standard library). On some systems, you many need further workarounds:
+    # https://github.com/pybind/pybind11/pull/2475
+    - name: Configure
+      shell: bash
+      run: |
+        source /etc/profile.d/modules.sh
+        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.9
+        cmake3 -S . -B build -DDOWNLOAD_CATCH=ON \
+                            -DCMAKE_CXX_STANDARD=11 \
+                            -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") \
+                            -DCMAKE_CXX_FLAGS="-Wc,--pending_instantiations=0" \
+                            -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp;test_virtual_functions.cpp"
+
+    # Building before installing Pip should produce a warning but not an error
+    - name: Build
+      run: cmake3 --build build -j 2 --verbose
+
+    - name: Install CMake with pip
+      run: |
+        python3 -m pip install --upgrade pip
+        python3 -m pip install pytest
+
+    - name: Python tests
+      run: cmake3 --build build --target pytest
+
+    - name: C++ tests
+      run: cmake3 --build build --target cpptest
+
+    - name: Interface test
+      run: cmake3 --build build --target test_cmake_build
+
+  # Testing on GCC using the GCC docker images (only recent images supported)
+  gcc:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        gcc:
+          - 7
+          - latest
+        std:
+          - 11
+        include:
+          - gcc: 10
+            std: 20
+
+    name: "🐍 3 • GCC ${{ matrix.gcc }} • C++${{ matrix.std }}• x64"
+    container: "gcc:${{ matrix.gcc }}"
+
+    steps:
+    - uses: actions/checkout@v1
+
+    - name: Add Python 3
+      run: apt-get update; apt-get install -y python3-dev python3-numpy python3-pytest python3-pip libeigen3-dev
+
+    - name: Update pip
+      run: python3 -m pip install --upgrade pip
+
+    - name: Setup CMake 3.18
+      uses: jwlawson/actions-setup-cmake@v1.4
+      with:
+        cmake-version: 3.18
+
+    - name: Configure
+      shell: bash
+      run: >
+        cmake -S . -B build
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DCMAKE_CXX_STANDARD=${{ matrix.std }}
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build
+      run: cmake --build build -j 2
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++ tests
+      run: cmake --build build --target cpptest
+
+    - name: Interface test
+      run: cmake --build build --target test_cmake_build
+
+
+  # Testing on CentOS (manylinux uses a centos base, and this is an easy way
+  # to get GCC 4.8, which is the manylinux1 compiler).
+  centos:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        centos:
+          - 7  # GCC 4.8
+          - 8
+
+    name: "🐍 3 • CentOS ${{ matrix.centos }} • x64"
+    container: "centos:${{ matrix.centos }}"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Add Python 3
+      run: yum update -y && yum install -y python3-devel gcc-c++ make git
+
+    - name: Update pip
+      run: python3 -m pip install --upgrade pip
+
+    - name: Install dependencies
+      run: python3 -m pip install cmake -r tests/requirements.txt --prefer-binary
+
+    - name: Configure
+      shell: bash
+      run: >
+        cmake -S . -B build
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=11
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Build
+      run: cmake --build build -j 2
+
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++ tests
+      run: cmake --build build --target cpptest
+
+    - name: Interface test
+      run: cmake --build build --target test_cmake_build
+
+
+  # This tests an "install" with the CMake tools
+  install-classic:
+    name: "🐍 3.5 • Debian • x86 •  Install"
+    runs-on: ubuntu-latest
+    container: i386/debian:stretch
+
+    steps:
+    - uses: actions/checkout@v1
+
+    - name: Install requirements
+      run: |
+        apt-get update
+        apt-get install -y git make cmake g++ libeigen3-dev python3-dev python3-pip
+        pip3 install "pytest==3.1.*"
+
+    - name: Configure for install
+      run: >
+        cmake .
+        -DPYBIND11_INSTALL=1 -DPYBIND11_TEST=0
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+    - name: Make and install
+      run: make install
+
+    - name: Copy tests to new directory
+      run: cp -a tests /pybind11-tests
+
+    - name: Make a new test directory
+      run: mkdir /build-tests
+
+    - name: Configure tests
+      run: >
+        cmake ../pybind11-tests
+        -DDOWNLOAD_CATCH=ON
+        -DPYBIND11_WERROR=ON
+        -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+      working-directory: /build-tests
+
+    - name: Run tests
+      run: make pytest -j 2
+      working-directory: /build-tests
+
+
+  # This verifies that the documentation is not horribly broken, and does a
+  # basic sanity check on the SDist.
+  doxygen:
+    name: "Documentation build test"
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v2
+
+    - name: Install Doxygen
+      run: sudo apt-get install -y doxygen librsvg2-bin # Changed to rsvg-convert in 20.04
+
+    - name: Install docs & setup requirements
+      run: python3 -m pip install -r docs/requirements.txt
+
+    - name: Build docs
+      run: python3 -m sphinx -W -b html docs docs/.build
+
+    - name: Make SDist
+      run: python3 setup.py sdist
+
+    - run: git status --ignored
+
+    - name: Check local include dir
+      run: >
+        ls pybind11;
+        python3 -c "import pybind11, pathlib; assert (a := pybind11.get_include()) == (b := str(pathlib.Path('include').resolve())), f'{a} != {b}'"
+
+    - name: Compare Dists (headers only)
+      working-directory: include
+      run: |
+        python3 -m pip install --user -U ../dist/*
+        installed=$(python3 -c "import pybind11; print(pybind11.get_include() + '/pybind11')")
+        diff -rq $installed ./pybind11
+
+  win32:
+    strategy:
+      fail-fast: false
+      matrix:
+        python:
+        - 3.5
+        - 3.6
+        - 3.7
+        - 3.8
+        - 3.9
+        - pypy3
+        # TODO: fix hang on pypy2
+
+        include:
+          - python: 3.9
+            args: -DCMAKE_CXX_STANDARD=20 -DDOWNLOAD_EIGEN=OFF
+          - python: 3.8
+            args: -DCMAKE_CXX_STANDARD=17
+
+    name: "🐍 ${{ matrix.python }} • MSVC 2019 • x86 ${{ matrix.args }}"
+    runs-on: windows-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python }}
+        architecture: x86
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.4
+
+    - name: Prepare MSVC
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: x86
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    # First build - C++11 mode and inplace
+    - name: Configure ${{ matrix.args }}
+      run: >
+        cmake -S . -B build
+        -G "Visual Studio 16 2019" -A Win32
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        ${{ matrix.args }}
+    - name: Build C++11
+      run: cmake --build build -j 2
+
+    - name: Run tests
+      run: cmake --build build -t pytest
+
+  win32-msvc2015:
+    name: "🐍 ${{ matrix.python }} • MSVC 2015 • x64"
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python:
+          - 2.7
+          - 3.6
+          - 3.7
+          # todo: check/cpptest does not support 3.8+ yet
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup 🐍 ${{ matrix.python }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python }}
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.4
+
+    - name: Prepare MSVC
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        toolset: 14.0
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    # First build - C++11 mode and inplace
+    - name: Configure
+      run: >
+        cmake -S . -B build
+        -G "Visual Studio 14 2015" -A x64
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+
+    - name: Build C++14
+      run: cmake --build build -j 2
+
+    - name: Run all checks
+      run: cmake --build build -t check
+
+
+  win32-msvc2017:
+    name: "🐍 ${{ matrix.python }} • MSVC 2017 • x64"
+    runs-on: windows-2016
+    strategy:
+      fail-fast: false
+      matrix:
+        python:
+          - 2.7
+          - 3.5
+          - 3.7
+        std:
+          - 14
+
+        include:
+          - python: 2.7
+            std: 17
+            args: >
+              -DCMAKE_CXX_FLAGS="/permissive- /EHsc /GR"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup 🐍 ${{ matrix.python }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python }}
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.4
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    # First build - C++11 mode and inplace
+    - name: Configure
+      run: >
+        cmake -S . -B build
+        -G "Visual Studio 15 2017" -A x64
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=${{ matrix.std }}
+        ${{ matrix.args }}
+
+    - name: Build ${{ matrix.std }}
+      run: cmake --build build -j 2
+
+    - name: Run all checks
+      run: cmake --build build -t check
diff --git a/.github/workflows/configure.yml b/.github/workflows/configure.yml
new file mode 100644
index 0000000..23f6022
--- /dev/null
+++ b/.github/workflows/configure.yml
@@ -0,0 +1,84 @@
+name: Config
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - master
+      - stable
+      - v*
+
+jobs:
+  # This tests various versions of CMake in various combinations, to make sure
+  # the configure step passes.
+  cmake:
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on: [ubuntu-latest, macos-latest, windows-latest]
+        arch: [x64]
+        cmake: [3.18]
+
+        include:
+        - runs-on: ubuntu-latest
+          arch: x64
+          cmake: 3.4
+
+        - runs-on: macos-latest
+          arch: x64
+          cmake: 3.7
+
+        - runs-on: windows-2016
+          arch: x86
+          cmake: 3.8
+
+        - runs-on: windows-2016
+          arch: x86
+          cmake: 3.18
+
+    name: 🐍 3.7 • CMake ${{ matrix.cmake }} • ${{ matrix.runs-on }}
+    runs-on: ${{ matrix.runs-on }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup Python 3.7
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+        architecture: ${{ matrix.arch }}
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt
+
+    # An action for adding a specific version of CMake:
+    #   https://github.com/jwlawson/actions-setup-cmake
+    - name: Setup CMake ${{ matrix.cmake }}
+      uses: jwlawson/actions-setup-cmake@v1.3
+      with:
+        cmake-version: ${{ matrix.cmake }}
+
+    # These steps use a directory with a space in it intentionally
+    - name: Make build directories
+      run: mkdir "build dir"
+
+    - name: Configure
+      working-directory: build dir
+      shell: bash
+      run: >
+        cmake ..
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DPYTHON_EXECUTABLE=$(python -c "import sys; print(sys.executable)")
+
+    # Only build and test if this was manually triggered in the GitHub UI
+    - name: Build
+      working-directory: build dir
+      if: github.event_name == 'workflow_dispatch'
+      run: cmake --build . --config Release
+
+    - name: Test
+      working-directory: build dir
+      if: github.event_name == 'workflow_dispatch'
+      run: cmake --build . --config Release --target check
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 0000000..5cebed1
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,46 @@
+# This is a format job. Pre-commit has a first-party GitHub action, so we use
+# that: https://github.com/pre-commit/action
+
+name: Format
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+    - master
+    - stable
+    - "v*"
+
+jobs:
+  pre-commit:
+    name: Format
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+    - uses: pre-commit/action@v2.0.0
+      with:
+        # Slow hooks are marked with manual - slow is okay here, run them too
+        extra_args: --hook-stage manual --all-files
+
+  clang-tidy:
+    name: Clang-Tidy
+    runs-on: ubuntu-latest
+    container: silkeh/clang:10
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install requirements
+      run: apt-get update && apt-get install -y python3-dev python3-pytest
+
+    - name: Configure
+      run: >
+        cmake -S . -B build
+        -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--warnings-as-errors=*"
+        -DDOWNLOAD_EIGEN=ON
+        -DDOWNLOAD_CATCH=ON
+        -DCMAKE_CXX_STANDARD=17
+
+    - name: Build
+      run: cmake --build build -j 2
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 0000000..d2b5979
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,16 @@
+name: Labeler
+on:
+  pull_request_target:
+    types: [closed]
+
+jobs:
+  label:
+    name: Labeler
+    runs-on: ubuntu-latest
+    steps:
+
+    - uses: actions/labeler@main
+      if: github.event.pull_request.merged == true
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        configuration-path: .github/labeler_merged.yml
diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
new file mode 100644
index 0000000..4414a12
--- /dev/null
+++ b/.github/workflows/pip.yml
@@ -0,0 +1,103 @@
+name: Pip
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+    - master
+    - stable
+    - v*
+  release:
+    types:
+    - published
+
+jobs:
+  # This builds the sdists and wheels and makes sure the files are exactly as
+  # expected. Using Windows and Python 2.7, since that is often the most
+  # challenging matrix element.
+  test-packaging:
+    name: 🐍 2.7 • 📦 tests • windows-latest
+    runs-on: windows-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup 🐍 2.7
+      uses: actions/setup-python@v2
+      with:
+        python-version: 2.7
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt --prefer-binary
+
+    - name: Python Packaging tests
+      run: pytest tests/extra_python_package/
+
+
+  # This runs the packaging tests and also builds and saves the packages as
+  # artifacts.
+  packaging:
+    name: 🐍 3.8 • 📦 & 📦 tests • ubuntu-latest
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Setup 🐍 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+
+    - name: Prepare env
+      run: python -m pip install -r tests/requirements.txt build twine --prefer-binary
+
+    - name: Python Packaging tests
+      run: pytest tests/extra_python_package/
+
+    - name: Build SDist and wheels
+      run: |
+        python -m build
+        PYBIND11_GLOBAL_SDIST=1 python -m build
+
+    - name: Check metadata
+      run: twine check dist/*
+
+    - name: Save standard package
+      uses: actions/upload-artifact@v2
+      with:
+        name: standard
+        path: dist/pybind11-*
+
+    - name: Save global package
+      uses: actions/upload-artifact@v2
+      with:
+        name: global
+        path: dist/pybind11_global-*
+
+
+
+  # When a GitHub release is made, upload the artifacts to PyPI
+  upload:
+    name: Upload to PyPI
+    runs-on: ubuntu-latest
+    if: github.event_name == 'release' && github.event.action == 'published'
+    needs: [packaging]
+
+    steps:
+    - uses: actions/setup-python@v2
+
+    # Downloads all to directories matching the artifact names
+    - uses: actions/download-artifact@v2
+
+    - name: Publish standard package
+      uses: pypa/gh-action-pypi-publish@v1.4.1
+      with:
+        password: ${{ secrets.pypi_password }}
+        packages_dir: standard/
+
+    - name: Publish global package
+      uses: pypa/gh-action-pypi-publish@v1.4.1
+      with:
+        password: ${{ secrets.pypi_password_global }}
+        packages_dir: global/
diff --git a/.gitignore b/.gitignore
index 979fd44..3f36b89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 CMakeFiles
 Makefile
 cmake_install.cmake
+cmake_uninstall.cmake
 .DS_Store
 *.so
 *.pyd
@@ -10,6 +11,7 @@
 *.sdf
 *.opensdf
 *.vcxproj
+*.vcxproj.user
 *.filters
 example.dir
 Win32
@@ -30,9 +32,12 @@
 .*.swp
 .DS_Store
 /dist
-/build
-/cmake/
+/*build*
 .cache/
 sosize-*.txt
 pybind11Config*.cmake
 pybind11Targets.cmake
+/*env*
+/.vscode
+/pybind11/include/*
+/pybind11/share/*
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index d063a8e..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "tools/clang"]
-	path = tools/clang
-	url = ../../wjakob/clang-cindex-python3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..85254a8
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,100 @@
+# To use:
+#
+#     pre-commit run -a
+#
+# Or:
+#
+#     pre-commit install  # (runs every time you commit in git)
+#
+# To update this file:
+#
+#     pre-commit autoupdate
+#
+# See https://github.com/pre-commit/pre-commit
+
+repos:
+# Standard hooks
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v3.2.0
+  hooks:
+  - id: check-added-large-files
+  - id: check-case-conflict
+  - id: check-merge-conflict
+  - id: check-symlinks
+  - id: check-yaml
+  - id: debug-statements
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+  - id: fix-encoding-pragma
+
+# Black, the code formatter, natively supports pre-commit
+- repo: https://github.com/psf/black
+  rev: 20.8b1
+  hooks:
+  - id: black
+    # By default, this ignores pyi files, though black supports them
+    types: [text]
+    files: \.pyi?$
+
+# Changes tabs to spaces
+- repo: https://github.com/Lucas-C/pre-commit-hooks
+  rev: v1.1.9
+  hooks:
+  - id: remove-tabs
+
+# Flake8 also supports pre-commit natively (same author)
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 3.8.3
+  hooks:
+  - id: flake8
+    additional_dependencies: [flake8-bugbear, pep8-naming]
+    exclude: ^(docs/.*|tools/.*)$
+
+# CMake formatting
+- repo: https://github.com/cheshirekow/cmake-format-precommit
+  rev: v0.6.13
+  hooks:
+  - id: cmake-format
+    additional_dependencies: [pyyaml]
+    types: [file]
+    files: (\.cmake|CMakeLists.txt)(.in)?$
+
+# Check static types with mypy
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.790
+  hooks:
+  - id: mypy
+    # The default Python type ignores .pyi files, so let's rerun if detected
+    types: [text]
+    files: ^pybind11.*\.pyi?$
+    # Running per-file misbehaves a bit, so just run on all files, it's fast
+    pass_filenames: false
+
+# Checks the manifest for missing files (native support)
+- repo: https://github.com/mgedmin/check-manifest
+  rev: "0.43"
+  hooks:
+  - id: check-manifest
+    # This is a slow hook, so only run this if --hook-stage manual is passed
+    stages: [manual]
+    additional_dependencies: [cmake, ninja]
+
+# The original pybind11 checks for a few C++ style items
+- repo: local
+  hooks:
+  - id: disallow-caps
+    name: Disallow improper capitalization
+    language: pygrep
+    entry: PyBind|Numpy|Cmake|CCache
+    exclude: .pre-commit-config.yaml
+
+- repo: local
+  hooks:
+  - id: check-style
+    name: Classic check-style
+    language: system
+    types:
+    - c++
+    entry: ./tools/check-style.sh
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d81cd8c..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,306 +0,0 @@
-language: cpp
-matrix:
-  include:
-  # This config does a few things:
-  # - Checks C++ and Python code styles (check-style.sh and flake8).
-  # - Makes sure sphinx can build the docs without any errors or warnings.
-  # - Tests setup.py sdist and install (all header files should be present).
-  # - Makes sure that everything still works without optional deps (numpy/scipy/eigen) and
-  #   also tests the automatic discovery functions in CMake (Python version, C++ standard).
-  - os: linux
-    dist: xenial # Necessary to run doxygen 1.8.15
-    name: Style, docs, and pip
-    cache: false
-    before_install:
-    - pyenv global $(pyenv whence 2to3)  # activate all python versions
-    - PY_CMD=python3
-    - $PY_CMD -m pip install --user --upgrade pip wheel setuptools
-    install:
-    # breathe 4.14 doesn't work with bit fields. See https://github.com/michaeljones/breathe/issues/462
-    - $PY_CMD -m pip install --user --upgrade sphinx sphinx_rtd_theme breathe==4.13.1 flake8 pep8-naming pytest
-    - curl -fsSL https://sourceforge.net/projects/doxygen/files/rel-1.8.15/doxygen-1.8.15.linux.bin.tar.gz/download | tar xz
-    - export PATH="$PWD/doxygen-1.8.15/bin:$PATH"
-    script:
-    - tools/check-style.sh
-    - flake8
-    - $PY_CMD -m sphinx -W -b html docs docs/.build
-    - |
-      # Make sure setup.py distributes and installs all the headers
-      $PY_CMD setup.py sdist
-      $PY_CMD -m pip install --user -U ./dist/*
-      installed=$($PY_CMD -c "import pybind11; print(pybind11.get_include(True) + '/pybind11')")
-      diff -rq $installed ./include/pybind11
-    - |
-      # Barebones build
-      cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) .
-      make pytest -j 2 && make cpptest -j 2
-  # The following are regular test configurations, including optional dependencies.
-  # With regard to each other they differ in Python version, C++ standard and compiler.
-  - os: linux
-    dist: trusty
-    name: Python 2.7, c++11, gcc 4.8
-    env: PYTHON=2.7 CPP=11 GCC=4.8
-    addons:
-      apt:
-        packages:
-          - cmake=2.\*
-          - cmake-data=2.\*
-  - os: linux
-    dist: trusty
-    name: Python 3.6, c++11, gcc 4.8
-    env: PYTHON=3.6 CPP=11 GCC=4.8
-    addons:
-      apt:
-        sources:
-          - deadsnakes
-        packages:
-          - python3.6-dev
-          - python3.6-venv
-          - cmake=2.\*
-          - cmake-data=2.\*
-  - os: linux
-    dist: trusty
-    env: PYTHON=2.7 CPP=14 GCC=6 CMAKE=1
-    name: Python 2.7, c++14, gcc 6, CMake test
-    addons:
-      apt:
-        sources:
-          - ubuntu-toolchain-r-test
-        packages:
-          - g++-6
-  - os: linux
-    dist: trusty
-    name: Python 3.5, c++14, gcc 6, Debug build
-    # N.B. `ensurepip` could be installed transitively by `python3.5-venv`, but
-    # seems to have apt conflicts (at least for Trusty). Use Docker instead.
-    services: docker
-    env: DOCKER=debian:stretch PYTHON=3.5 CPP=14 GCC=6 DEBUG=1
-  - os: linux
-    dist: xenial
-    env: PYTHON=3.6 CPP=17 GCC=7
-    name: Python 3.6, c++17, gcc 7
-    addons:
-      apt:
-        sources:
-          - deadsnakes
-          - ubuntu-toolchain-r-test
-        packages:
-          - g++-7
-          - python3.6-dev
-          - python3.6-venv
-  - os: linux
-    dist: xenial
-    env: PYTHON=3.6 CPP=17 CLANG=7
-    name: Python 3.6, c++17, Clang 7
-    addons:
-      apt:
-        sources:
-          - deadsnakes
-          - llvm-toolchain-xenial-7
-        packages:
-          - python3.6-dev
-          - python3.6-venv
-          - clang-7
-          - libclang-7-dev
-          - llvm-7-dev
-          - lld-7
-          - libc++-7-dev
-          - libc++abi-7-dev  # Why is this necessary???
-  - os: linux
-    dist: xenial
-    env: PYTHON=3.8 CPP=17 GCC=7
-    name: Python 3.8, c++17, gcc 7 (w/o numpy/scipy) # TODO: update build name when the numpy/scipy wheels become available
-    addons:
-      apt:
-        sources:
-          - deadsnakes
-          - ubuntu-toolchain-r-test
-        packages:
-          - g++-7
-          - python3.8-dev
-          - python3.8-venv
-    # Currently there is no numpy/scipy wheels available for python3.8
-    # TODO: remove next before_install, install and script clause when the wheels become available
-    before_install:
-      - pyenv global $(pyenv whence 2to3)  # activate all python versions
-      - PY_CMD=python3
-      - $PY_CMD -m pip install --user --upgrade pip wheel setuptools
-    install:
-      - $PY_CMD -m pip install --user --upgrade pytest
-    script:
-      - |
-        # Barebones build
-        cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) .
-        make pytest -j 2 && make cpptest -j 2
-  - os: osx
-    name: Python 2.7, c++14, AppleClang 7.3, CMake test
-    osx_image: xcode7.3
-    env: PYTHON=2.7 CPP=14 CLANG CMAKE=1
-  - os: osx
-    name: Python 3.7, c++14, AppleClang 9, Debug build
-    osx_image: xcode9.4
-    env: PYTHON=3.7 CPP=14 CLANG DEBUG=1
-  # Test a PyPy 2.7 build
-  - os: linux
-    dist: trusty
-    env: PYPY=5.8 PYTHON=2.7 CPP=11 GCC=4.8
-    name: PyPy 5.8, Python 2.7, c++11, gcc 4.8
-    addons:
-      apt:
-        packages:
-          - libblas-dev
-          - liblapack-dev
-          - gfortran
-  # Build in 32-bit mode and tests against the CMake-installed version
-  - os: linux
-    dist: trusty
-    services: docker
-    env: DOCKER=i386/debian:stretch PYTHON=3.5 CPP=14 GCC=6 INSTALL=1
-    name: Python 3.5, c++14, gcc 6, 32-bit
-    script:
-      - |
-        # Consolidated 32-bit Docker Build + Install
-        set -ex
-        $SCRIPT_RUN_PREFIX sh -c "
-          set -ex
-          cmake ${CMAKE_EXTRA_ARGS} -DPYBIND11_INSTALL=1 -DPYBIND11_TEST=0 .
-          make install
-          cp -a tests /pybind11-tests
-          mkdir /build-tests && cd /build-tests
-          cmake ../pybind11-tests ${CMAKE_EXTRA_ARGS} -DPYBIND11_WERROR=ON
-          make pytest -j 2"
-        set +ex
-cache:
-  directories:
-  - $HOME/.local/bin
-  - $HOME/.local/lib
-  - $HOME/.local/include
-  - $HOME/Library/Python
-before_install:
-- |
-  # Configure build variables
-  set -ex
-  if [ "$TRAVIS_OS_NAME" = "linux" ]; then
-    if [ -n "$CLANG" ]; then
-      export CXX=clang++-$CLANG CC=clang-$CLANG
-      EXTRA_PACKAGES+=" clang-$CLANG llvm-$CLANG-dev"
-    else
-      if [ -z "$GCC" ]; then GCC=4.8
-      else EXTRA_PACKAGES+=" g++-$GCC"
-      fi
-      export CXX=g++-$GCC CC=gcc-$GCC
-    fi
-  elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
-    export CXX=clang++ CC=clang;
-  fi
-  if [ -n "$CPP" ]; then CPP=-std=c++$CPP; fi
-  if [ "${PYTHON:0:1}" = "3" ]; then PY=3; fi
-  if [ -n "$DEBUG" ]; then CMAKE_EXTRA_ARGS+=" -DCMAKE_BUILD_TYPE=Debug"; fi
-  set +ex
-- |
-  # Initialize environment
-  set -ex
-  if [ -n "$DOCKER" ]; then
-    docker pull $DOCKER
-
-    containerid=$(docker run --detach --tty \
-      --volume="$PWD":/pybind11 --workdir=/pybind11 \
-      --env="CC=$CC" --env="CXX=$CXX" --env="DEBIAN_FRONTEND=$DEBIAN_FRONTEND" \
-      --env=GCC_COLORS=\  \
-      $DOCKER)
-    SCRIPT_RUN_PREFIX="docker exec --tty $containerid"
-    $SCRIPT_RUN_PREFIX sh -c 'for s in 0 15; do sleep $s; apt-get update && apt-get -qy dist-upgrade && break; done'
-  else
-    if [ "$PYPY" = "5.8" ]; then
-      curl -fSL https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 | tar xj
-      PY_CMD=$(echo `pwd`/pypy2-v5.8.0-linux64/bin/pypy)
-      CMAKE_EXTRA_ARGS+=" -DPYTHON_EXECUTABLE:FILEPATH=$PY_CMD"
-    else
-      PY_CMD=python$PYTHON
-      if [ "$TRAVIS_OS_NAME" = "osx" ]; then
-        if [ "$PY" = "3" ]; then
-          brew update && brew unlink python@2 && brew upgrade python
-        else
-          curl -fsSL https://bootstrap.pypa.io/get-pip.py | $PY_CMD - --user
-        fi
-      fi
-    fi
-    if [ "$PY" = 3 ] || [ -n "$PYPY" ]; then
-      $PY_CMD -m ensurepip --user
-    fi
-    $PY_CMD --version
-    $PY_CMD -m pip install --user --upgrade pip wheel
-  fi
-  set +ex
-install:
-- |
-  # Install dependencies
-  set -ex
-  cmake --version
-  if [ -n "$DOCKER" ]; then
-    if [ -n "$DEBUG" ]; then
-      PY_DEBUG="python$PYTHON-dbg python$PY-scipy-dbg"
-      CMAKE_EXTRA_ARGS+=" -DPYTHON_EXECUTABLE=/usr/bin/python${PYTHON}dm"
-    fi
-    $SCRIPT_RUN_PREFIX sh -c "for s in 0 15; do sleep \$s; \
-      apt-get -qy --no-install-recommends install \
-        $PY_DEBUG python$PYTHON-dev python$PY-pytest python$PY-scipy \
-        libeigen3-dev libboost-dev cmake make ${EXTRA_PACKAGES} && break; done"
-  else
-
-    if [ "$CLANG" = "7" ]; then
-      export CXXFLAGS="-stdlib=libc++"
-    fi
-
-    export NPY_NUM_BUILD_JOBS=2
-    echo "Installing pytest, numpy, scipy..."
-    local PIP_CMD=""
-    if [ -n $PYPY ]; then
-      # For expediency, install only versions that are available on the extra index.
-      travis_wait 30 \
-        $PY_CMD -m pip install --user --upgrade --extra-index-url https://imaginary.ca/trusty-pypi \
-          pytest numpy==1.15.4 scipy==1.2.0
-    else
-      $PY_CMD -m pip install --user --upgrade pytest numpy scipy
-    fi
-    echo "done."
-
-    mkdir eigen
-    curl -fsSL https://bitbucket.org/eigen/eigen/get/3.3.4.tar.bz2 | \
-        tar --extract -j --directory=eigen --strip-components=1
-    export CMAKE_INCLUDE_PATH="${CMAKE_INCLUDE_PATH:+$CMAKE_INCLUDE_PATH:}$PWD/eigen"
-  fi
-  set +ex
-script:
-- |
-  # CMake Configuration
-  set -ex
-  $SCRIPT_RUN_PREFIX cmake ${CMAKE_EXTRA_ARGS} \
-    -DPYBIND11_PYTHON_VERSION=$PYTHON \
-    -DPYBIND11_CPP_STANDARD=$CPP \
-    -DPYBIND11_WERROR=${WERROR:-ON} \
-    -DDOWNLOAD_CATCH=${DOWNLOAD_CATCH:-ON} \
-    .
-  set +ex
-- |
-  # pytest
-  set -ex
-  $SCRIPT_RUN_PREFIX make pytest -j 2 VERBOSE=1
-  set +ex
-- |
-  # cpptest
-  set -ex
-  $SCRIPT_RUN_PREFIX make cpptest -j 2
-  set +ex
-- |
-  # CMake Build Interface
-  set -ex
-  if [ -n "$CMAKE" ]; then $SCRIPT_RUN_PREFIX make test_cmake_build; fi
-  set +ex
-after_failure: cat tests/test_cmake_build/*.log*
-after_script:
-- |
-  # Cleanup (Docker)
-  set -ex
-  if [ -n "$DOCKER" ]; then docker stop "$containerid"; docker rm "$containerid"; fi
-  set +ex
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 85ecd90..2c08ff0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,153 +5,266 @@
 # All rights reserved. Use of this source code is governed by a
 # BSD-style license that can be found in the LICENSE file.
 
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required(VERSION 3.4)
 
-if (POLICY CMP0048)
-  # cmake warns if loaded from a min-3.0-required parent dir, so silence the warning:
-  cmake_policy(SET CMP0048 NEW)
+# The `cmake_minimum_required(VERSION 3.4...3.18)` syntax does not work with
+# some versions of VS that have a patched CMake 3.11. This forces us to emulate
+# the behavior using the following workaround:
+if(${CMAKE_VERSION} VERSION_LESS 3.18)
+  cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
+else()
+  cmake_policy(VERSION 3.18)
 endif()
 
-# CMake versions < 3.4.0 do not support try_compile/pthread checks without C as active language.
-if(CMAKE_VERSION VERSION_LESS 3.4.0)
-  project(pybind11)
-else()
-  project(pybind11 CXX)
+# Extract project version from source
+file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/pybind11/detail/common.h"
+     pybind11_version_defines REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ")
+
+foreach(ver ${pybind11_version_defines})
+  if(ver MATCHES [[#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$]])
+    set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}")
+  endif()
+endforeach()
+
+if(PYBIND11_VERSION_PATCH MATCHES [[\.([a-zA-Z0-9]+)$]])
+  set(pybind11_VERSION_TYPE "${CMAKE_MATCH_1}")
+endif()
+string(REGEX MATCH "^[0-9]+" PYBIND11_VERSION_PATCH "${PYBIND11_VERSION_PATCH}")
+
+project(
+  pybind11
+  LANGUAGES CXX
+  VERSION "${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH}")
+
+# Standard includes
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+include(CMakeDependentOption)
+
+if(NOT pybind11_FIND_QUIETLY)
+  message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}")
 endif()
 
 # Check if pybind11 is being used directly or via add_subdirectory
-set(PYBIND11_MASTER_PROJECT OFF)
-if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
+  ### Warn if not an out-of-source builds
+  if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
+    set(lines
+        "You are building in-place. If that is not what you intended to "
+        "do, you can clean the source directory with:\n"
+        "rm -r CMakeCache.txt CMakeFiles/ cmake_uninstall.cmake pybind11Config.cmake "
+        "pybind11ConfigVersion.cmake tests/CMakeFiles/\n")
+    message(AUTHOR_WARNING ${lines})
+  endif()
+
   set(PYBIND11_MASTER_PROJECT ON)
+
+  if(OSX AND CMAKE_VERSION VERSION_LESS 3.7)
+    # Bug in macOS CMake < 3.7 is unable to download catch
+    message(WARNING "CMAKE 3.7+ needed on macOS to download catch, and newer HIGHLY recommended")
+  elseif(WINDOWS AND CMAKE_VERSION VERSION_LESS 3.8)
+    # Only tested with 3.8+ in CI.
+    message(WARNING "CMAKE 3.8+ tested on Windows, previous versions untested")
+  endif()
+
+  message(STATUS "CMake ${CMAKE_VERSION}")
+
+  if(CMAKE_CXX_STANDARD)
+    set(CMAKE_CXX_EXTENSIONS OFF)
+    set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  endif()
+else()
+  set(PYBIND11_MASTER_PROJECT OFF)
+  set(pybind11_system SYSTEM)
 endif()
 
+# Options
 option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
-option(PYBIND11_TEST    "Build pybind11 test suite?"     ${PYBIND11_MASTER_PROJECT})
+option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
+option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
 
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/tools")
+cmake_dependent_option(
+  USE_PYTHON_INCLUDE_DIR
+  "Install pybind11 headers in Python include directory instead of default installation prefix"
+  OFF "PYBIND11_INSTALL" OFF)
 
-include(pybind11Tools)
-
-# Cache variables so pybind11_add_module can be used in parent projects
-set(PYBIND11_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}/include" CACHE INTERNAL "")
-set(PYTHON_INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS} CACHE INTERNAL "")
-set(PYTHON_LIBRARIES ${PYTHON_LIBRARIES} CACHE INTERNAL "")
-set(PYTHON_MODULE_PREFIX ${PYTHON_MODULE_PREFIX} CACHE INTERNAL "")
-set(PYTHON_MODULE_EXTENSION ${PYTHON_MODULE_EXTENSION} CACHE INTERNAL "")
-set(PYTHON_VERSION_MAJOR ${PYTHON_VERSION_MAJOR} CACHE INTERNAL "")
-set(PYTHON_VERSION_MINOR ${PYTHON_VERSION_MINOR} CACHE INTERNAL "")
+cmake_dependent_option(PYBIND11_FINDPYTHON "Force new FindPython" OFF
+                       "NOT CMAKE_VERSION VERSION_LESS 3.12" OFF)
 
 # NB: when adding a header don't forget to also add it to setup.py
 set(PYBIND11_HEADERS
-  include/pybind11/detail/class.h
-  include/pybind11/detail/common.h
-  include/pybind11/detail/descr.h
-  include/pybind11/detail/init.h
-  include/pybind11/detail/internals.h
-  include/pybind11/detail/typeid.h
-  include/pybind11/attr.h
-  include/pybind11/buffer_info.h
-  include/pybind11/cast.h
-  include/pybind11/chrono.h
-  include/pybind11/common.h
-  include/pybind11/complex.h
-  include/pybind11/options.h
-  include/pybind11/eigen.h
-  include/pybind11/embed.h
-  include/pybind11/eval.h
-  include/pybind11/functional.h
-  include/pybind11/numpy.h
-  include/pybind11/operators.h
-  include/pybind11/pybind11.h
-  include/pybind11/pytypes.h
-  include/pybind11/stl.h
-  include/pybind11/stl_bind.h
-)
-string(REPLACE "include/" "${CMAKE_CURRENT_SOURCE_DIR}/include/"
-       PYBIND11_HEADERS "${PYBIND11_HEADERS}")
+    include/pybind11/detail/class.h
+    include/pybind11/detail/common.h
+    include/pybind11/detail/descr.h
+    include/pybind11/detail/init.h
+    include/pybind11/detail/internals.h
+    include/pybind11/detail/typeid.h
+    include/pybind11/attr.h
+    include/pybind11/buffer_info.h
+    include/pybind11/cast.h
+    include/pybind11/chrono.h
+    include/pybind11/common.h
+    include/pybind11/complex.h
+    include/pybind11/options.h
+    include/pybind11/eigen.h
+    include/pybind11/embed.h
+    include/pybind11/eval.h
+    include/pybind11/iostream.h
+    include/pybind11/functional.h
+    include/pybind11/numpy.h
+    include/pybind11/operators.h
+    include/pybind11/pybind11.h
+    include/pybind11/pytypes.h
+    include/pybind11/stl.h
+    include/pybind11/stl_bind.h)
 
-if (PYBIND11_TEST)
-  add_subdirectory(tests)
-endif()
-
-include(GNUInstallDirs)
-include(CMakePackageConfigHelpers)
-
-# extract project version from source
-file(STRINGS "${PYBIND11_INCLUDE_DIR}/pybind11/detail/common.h" pybind11_version_defines
-     REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ")
-foreach(ver ${pybind11_version_defines})
-  if (ver MATCHES "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$")
-    set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}" CACHE INTERNAL "")
+# Compare with grep and warn if mismatched
+if(PYBIND11_MASTER_PROJECT AND NOT CMAKE_VERSION VERSION_LESS 3.12)
+  file(
+    GLOB_RECURSE _pybind11_header_check
+    LIST_DIRECTORIES false
+    RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+    CONFIGURE_DEPENDS "include/pybind11/*.h")
+  set(_pybind11_here_only ${PYBIND11_HEADERS})
+  set(_pybind11_disk_only ${_pybind11_header_check})
+  list(REMOVE_ITEM _pybind11_here_only ${_pybind11_header_check})
+  list(REMOVE_ITEM _pybind11_disk_only ${PYBIND11_HEADERS})
+  if(_pybind11_here_only)
+    message(AUTHOR_WARNING "PYBIND11_HEADERS has extra files:" ${_pybind11_here_only})
   endif()
-endforeach()
-set(${PROJECT_NAME}_VERSION ${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH})
-message(STATUS "pybind11 v${${PROJECT_NAME}_VERSION}")
-
-option (USE_PYTHON_INCLUDE_DIR "Install pybind11 headers in Python include directory instead of default installation prefix" OFF)
-if (USE_PYTHON_INCLUDE_DIR)
-    file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS})
+  if(_pybind11_disk_only)
+    message(AUTHOR_WARNING "PYBIND11_HEADERS is missing files:" ${_pybind11_disk_only})
+  endif()
 endif()
 
-if(NOT (CMAKE_VERSION VERSION_LESS 3.0))  # CMake >= 3.0
-  # Build an interface library target:
-  add_library(pybind11 INTERFACE)
-  add_library(pybind11::pybind11 ALIAS pybind11)  # to match exported target
-  target_include_directories(pybind11 INTERFACE $<BUILD_INTERFACE:${PYBIND11_INCLUDE_DIR}>
-                                                $<BUILD_INTERFACE:${PYTHON_INCLUDE_DIRS}>
+# CMake 3.12 added list(TRANSFORM <list> PREPEND
+# But we can't use it yet
+string(REPLACE "include/" "${CMAKE_CURRENT_SOURCE_DIR}/include/" PYBIND11_HEADERS
+               "${PYBIND11_HEADERS}")
+
+# Cache variable so this can be used in parent projects
+set(pybind11_INCLUDE_DIR
+    "${CMAKE_CURRENT_LIST_DIR}/include"
+    CACHE INTERNAL "Directory where pybind11 headers are located")
+
+# Backward compatible variable for add_subdirectory mode
+if(NOT PYBIND11_MASTER_PROJECT)
+  set(PYBIND11_INCLUDE_DIR
+      "${pybind11_INCLUDE_DIR}"
+      CACHE INTERNAL "")
+endif()
+
+# Note: when creating targets, you cannot use if statements at configure time -
+# you need generator expressions, because those will be placed in the target file.
+# You can also place ifs *in* the Config.in, but not here.
+
+# This section builds targets, but does *not* touch Python
+
+# Build the headers-only target (no Python included):
+# (long name used here to keep this from clashing in subdirectory mode)
+add_library(pybind11_headers INTERFACE)
+add_library(pybind11::pybind11_headers ALIAS pybind11_headers) # to match exported target
+add_library(pybind11::headers ALIAS pybind11_headers) # easier to use/remember
+
+include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake")
+
+# Relative directory setting
+if(USE_PYTHON_INCLUDE_DIR AND DEFINED Python_INCLUDE_DIRS)
+  file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${Python_INCLUDE_DIRS})
+elseif(USE_PYTHON_INCLUDE_DIR AND DEFINED PYTHON_INCLUDE_DIR)
+  file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS})
+endif()
+
+# Fill in headers target
+target_include_directories(
+  pybind11_headers ${pybind11_system} INTERFACE $<BUILD_INTERFACE:${pybind11_INCLUDE_DIR}>
                                                 $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
-  target_compile_options(pybind11 INTERFACE $<BUILD_INTERFACE:${PYBIND11_CPP_STANDARD}>)
 
-  add_library(module INTERFACE)
-  add_library(pybind11::module ALIAS module)
-  if(NOT MSVC)
-    target_compile_options(module INTERFACE -fvisibility=hidden)
-  endif()
-  target_link_libraries(module INTERFACE pybind11::pybind11)
-  if(WIN32 OR CYGWIN)
-    target_link_libraries(module INTERFACE $<BUILD_INTERFACE:${PYTHON_LIBRARIES}>)
-  elseif(APPLE)
-    target_link_libraries(module INTERFACE "-undefined dynamic_lookup")
+target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
+                                                   cxx_right_angle_brackets)
+
+if(PYBIND11_INSTALL)
+  install(DIRECTORY ${pybind11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+  set(PYBIND11_CMAKECONFIG_INSTALL_DIR
+      "${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}"
+      CACHE STRING "install path for pybind11Config.cmake")
+
+  configure_package_config_file(
+    tools/${PROJECT_NAME}Config.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+    INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+
+  if(CMAKE_VERSION VERSION_LESS 3.14)
+    # Remove CMAKE_SIZEOF_VOID_P from ConfigVersion.cmake since the library does
+    # not depend on architecture specific settings or libraries.
+    set(_PYBIND11_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
+    unset(CMAKE_SIZEOF_VOID_P)
+
+    write_basic_package_version_file(
+      ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
+      VERSION ${PROJECT_VERSION}
+      COMPATIBILITY AnyNewerVersion)
+
+    set(CMAKE_SIZEOF_VOID_P ${_PYBIND11_CMAKE_SIZEOF_VOID_P})
+  else()
+    # CMake 3.14+ natively supports header-only libraries
+    write_basic_package_version_file(
+      ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
+      VERSION ${PROJECT_VERSION}
+      COMPATIBILITY AnyNewerVersion ARCH_INDEPENDENT)
   endif()
 
-  add_library(embed INTERFACE)
-  add_library(pybind11::embed ALIAS embed)
-  target_link_libraries(embed INTERFACE pybind11::pybind11 $<BUILD_INTERFACE:${PYTHON_LIBRARIES}>)
+  install(
+    FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
+          ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
+          tools/FindPythonLibsNew.cmake
+          tools/pybind11Common.cmake
+          tools/pybind11Tools.cmake
+          tools/pybind11NewTools.cmake
+    DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+
+  if(NOT PYBIND11_EXPORT_NAME)
+    set(PYBIND11_EXPORT_NAME "${PROJECT_NAME}Targets")
+  endif()
+
+  install(TARGETS pybind11_headers EXPORT "${PYBIND11_EXPORT_NAME}")
+
+  install(
+    EXPORT "${PYBIND11_EXPORT_NAME}"
+    NAMESPACE "pybind11::"
+    DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+
+  # Uninstall target
+  if(PYBIND11_MASTER_PROJECT)
+    configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_uninstall.cmake.in"
+                   "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY)
+
+    add_custom_target(uninstall COMMAND ${CMAKE_COMMAND} -P
+                                        ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
+  endif()
 endif()
 
-if (PYBIND11_INSTALL)
-  install(DIRECTORY ${PYBIND11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
-  # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
-  set(PYBIND11_CMAKECONFIG_INSTALL_DIR "share/cmake/${PROJECT_NAME}" CACHE STRING "install path for pybind11Config.cmake")
-
-  configure_package_config_file(tools/${PROJECT_NAME}Config.cmake.in
-                                "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
-                                INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
-  # Remove CMAKE_SIZEOF_VOID_P from ConfigVersion.cmake since the library does
-  # not depend on architecture specific settings or libraries.
-  set(_PYBIND11_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P})
-  unset(CMAKE_SIZEOF_VOID_P)
-  write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
-                                   VERSION ${${PROJECT_NAME}_VERSION}
-                                   COMPATIBILITY AnyNewerVersion)
-  set(CMAKE_SIZEOF_VOID_P ${_PYBIND11_CMAKE_SIZEOF_VOID_P})
-  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
-                ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
-                tools/FindPythonLibsNew.cmake
-                tools/pybind11Tools.cmake
-          DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
-
-  if(NOT (CMAKE_VERSION VERSION_LESS 3.0))
-    if(NOT PYBIND11_EXPORT_NAME)
-      set(PYBIND11_EXPORT_NAME "${PROJECT_NAME}Targets")
-    endif()
-
-    install(TARGETS pybind11 module embed
-            EXPORT "${PYBIND11_EXPORT_NAME}")
-    if(PYBIND11_MASTER_PROJECT)
-      install(EXPORT "${PYBIND11_EXPORT_NAME}"
-              NAMESPACE "${PROJECT_NAME}::"
-              DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
+# BUILD_TESTING takes priority, but only if this is the master project
+if(PYBIND11_MASTER_PROJECT AND DEFINED BUILD_TESTING)
+  if(BUILD_TESTING)
+    if(_pybind11_nopython)
+      message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode")
+    else()
+      add_subdirectory(tests)
     endif()
   endif()
+else()
+  if(PYBIND11_TEST)
+    if(_pybind11_nopython)
+      message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode")
+    else()
+      add_subdirectory(tests)
+    endif()
+  endif()
+endif()
+
+# Better symmetry with find_package(pybind11 CONFIG) mode.
+if(NOT PYBIND11_MASTER_PROJECT)
+  set(pybind11_FOUND
+      TRUE
+      CACHE INTERNAL "True if pybind11 and all required components found on the system")
 endif()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index 01596d9..0000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,49 +0,0 @@
-Thank you for your interest in this project! Please refer to the following
-sections on how to contribute code and bug reports.
-
-### Reporting bugs
-
-At the moment, this project is run in the spare time of a single person
-([Wenzel Jakob](http://rgl.epfl.ch/people/wjakob)) with very limited resources
-for issue tracker tickets. Thus, before submitting a question or bug report,
-please take a moment of your time and ensure that your issue isn't already
-discussed in the project documentation provided at
-[http://pybind11.readthedocs.org/en/latest](http://pybind11.readthedocs.org/en/latest).
-
-Assuming that you have identified a previously unknown problem or an important
-question, it's essential that you submit a self-contained and minimal piece of
-code that reproduces the problem. In other words: no external dependencies,
-isolate the function(s) that cause breakage, submit matched and complete C++
-and Python snippets that can be easily compiled and run on my end.
-
-## Pull requests
-Contributions are submitted, reviewed, and accepted using Github pull requests.
-Please refer to [this
-article](https://help.github.com/articles/using-pull-requests) for details and
-adhere to the following rules to make the process as smooth as possible:
-
-* Make a new branch for every feature you're working on.
-* Make small and clean pull requests that are easy to review but make sure they
-  do add value by themselves.
-* Add tests for any new functionality and run the test suite (``make pytest``)
-  to ensure that no existing features break.
-* Please run ``flake8`` and ``tools/check-style.sh`` to check your code matches
-  the project style. (Note that ``check-style.sh`` requires ``gawk``.)
-* This project has a strong focus on providing general solutions using a
-  minimal amount of code, thus small pull requests are greatly preferred.
-
-### Licensing of contributions
-
-pybind11 is provided under a BSD-style license that can be found in the
-``LICENSE`` file. By using, distributing, or contributing to this project, you
-agree to the terms and conditions of this license.
-
-You are under no obligation whatsoever to provide any bug fixes, patches, or
-upgrades to the features, functionality or performance of the source code
-("Enhancements") to anyone; however, if you choose to make your Enhancements
-available either publicly, or directly to the author of this software, without
-imposing a separate written license agreement for such Enhancements, then you
-hereby grant the following license: a non-exclusive, royalty-free perpetual
-license to install, use, modify, prepare derivative works, incorporate into
-other computer software, distribute, and sublicense such enhancements or
-derivative works thereof, in binary and source code form.
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
deleted file mode 100644
index 75df399..0000000
--- a/ISSUE_TEMPLATE.md
+++ /dev/null
@@ -1,17 +0,0 @@
-Make sure you've completed the following steps before submitting your issue -- thank you!
-
-1. Check if your question has already been answered in the [FAQ](http://pybind11.readthedocs.io/en/latest/faq.html) section.
-2. Make sure you've read the [documentation](http://pybind11.readthedocs.io/en/latest/). Your issue may be addressed there.
-3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room](https://gitter.im/pybind/Lobby).
-4. If you have a genuine bug report or a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below.
-5. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible.
-
-*After reading, remove this checklist and the template text in parentheses below.*
-
-## Issue description
-
-(Provide a short description, state the expected behavior and what actually happens.)
-
-## Reproducible example code
-
-(The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.)
diff --git a/LICENSE b/LICENSE
index 6f15578..e466b0d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -25,5 +25,5 @@
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-Please also refer to the file CONTRIBUTING.md, which clarifies licensing of
+Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
 external contributions to this project including patches, pull requests, etc.
diff --git a/MANIFEST.in b/MANIFEST.in
index 6e57bae..aed183e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,6 @@
-recursive-include include/pybind11 *.h
-include LICENSE README.md CONTRIBUTING.md
+recursive-include pybind11/include/pybind11 *.h
+recursive-include pybind11 *.py
+recursive-include pybind11 py.typed
+recursive-include pybind11 *.pyi
+include pybind11/share/cmake/pybind11/*.cmake
+include LICENSE README.rst pyproject.toml setup.py setup.cfg
diff --git a/METADATA b/METADATA
index bbcbe08..05c3ffb 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@
     type: GIT
     value: "https://github.com/pybind/pybind11.git"
   }
-  version: "v2.5.0"
+  version: "v2.6.1"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2020
-    month: 3
-    day: 31
+    year: 2021
+    month: 1
+    day: 13
   }
 }
diff --git a/README.md b/README.md
deleted file mode 100644
index 35d2d76..0000000
--- a/README.md
+++ /dev/null
@@ -1,129 +0,0 @@
-![pybind11 logo](https://github.com/pybind/pybind11/raw/master/docs/pybind11-logo.png)
-
-# pybind11 — Seamless operability between C++11 and Python
-
-[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=master)](http://pybind11.readthedocs.org/en/master/?badge=master)
-[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=stable)](http://pybind11.readthedocs.org/en/stable/?badge=stable)
-[![Gitter chat](https://img.shields.io/gitter/room/gitterHQ/gitter.svg)](https://gitter.im/pybind/Lobby)
-[![Build Status](https://travis-ci.org/pybind/pybind11.svg?branch=master)](https://travis-ci.org/pybind/pybind11)
-[![Build status](https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true)](https://ci.appveyor.com/project/wjakob/pybind11)
-
-**pybind11** is a lightweight header-only library that exposes C++ types in Python
-and vice versa, mainly to create Python bindings of existing C++ code. Its
-goals and syntax are similar to the excellent
-[Boost.Python](http://www.boost.org/doc/libs/1_58_0/libs/python/doc/) library
-by David Abrahams: to minimize boilerplate code in traditional extension
-modules by inferring type information using compile-time introspection.
-
-The main issue with Boost.Python—and the reason for creating such a similar
-project—is Boost. Boost is an enormously large and complex suite of utility
-libraries that works with almost every C++ compiler in existence. This
-compatibility has its cost: arcane template tricks and workarounds are
-necessary to support the oldest and buggiest of compiler specimens. Now that
-C++11-compatible compilers are widely available, this heavy machinery has
-become an excessively large and unnecessary dependency.
-
-Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~4K lines of code and depend on
-Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
-compact implementation was possible thanks to some of the new C++11 language
-features (specifically: tuples, lambda functions and variadic templates). Since
-its creation, this library has grown beyond Boost.Python in many ways, leading
-to dramatically simpler binding code in many common situations.
-
-Tutorial and reference documentation is provided at
-[http://pybind11.readthedocs.org/en/master](http://pybind11.readthedocs.org/en/master).
-A PDF version of the manual is available
-[here](https://media.readthedocs.org/pdf/pybind11/master/pybind11.pdf).
-
-## Core features
-pybind11 can map the following core C++ features to Python
-
-- Functions accepting and returning custom data structures per value, reference, or pointer
-- Instance methods and static methods
-- Overloaded functions
-- Instance attributes and static attributes
-- Arbitrary exception types
-- Enumerations
-- Callbacks
-- Iterators and ranges
-- Custom operators
-- Single and multiple inheritance
-- STL data structures
-- Smart pointers with reference counting like ``std::shared_ptr``
-- Internal references with correct reference counting
-- C++ classes with virtual (and pure virtual) methods can be extended in Python
-
-## Goodies
-In addition to the core functionality, pybind11 provides some extra goodies:
-
-- Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7) are supported with an
-  implementation-agnostic interface.
-
-- It is possible to bind C++11 lambda functions with captured variables. The
-  lambda capture data is stored inside the resulting Python function object.
-
-- pybind11 uses C++11 move constructors and move assignment operators whenever
-  possible to efficiently transfer custom data types.
-
-- It's easy to expose the internal storage of custom data types through
-  Pythons' buffer protocols. This is handy e.g. for fast conversion between
-  C++ matrix classes like Eigen and NumPy without expensive copy operations.
-
-- pybind11 can automatically vectorize functions so that they are transparently
-  applied to all entries of one or more NumPy array arguments.
-
-- Python's slice-based access and assignment operations can be supported with
-  just a few lines of code.
-
-- Everything is contained in just a few header files; there is no need to link
-  against any additional libraries.
-
-- Binaries are generally smaller by a factor of at least 2 compared to
-  equivalent bindings generated by Boost.Python. A recent pybind11 conversion
-  of PyRosetta, an enormous Boost.Python binding project,
-  [reported](http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf) a binary
-  size reduction of **5.4x** and compile time reduction by **5.8x**.
-
-- Function signatures are precomputed at compile time (using ``constexpr``),
-  leading to smaller binaries.
-
-- With little extra effort, C++ types can be pickled and unpickled similar to
-  regular Python objects.
-
-## Supported compilers
-
-1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or newer)
-2. GCC 4.8 or newer
-3. Microsoft Visual Studio 2015 Update 3 or newer
-4. Intel C++ compiler 17 or newer (16 with pybind11 v2.0 and 15 with pybind11 v2.0 and a [workaround](https://github.com/pybind/pybind11/issues/276))
-5. Cygwin/GCC (tested on 2.5.1)
-
-## About
-
-This project was created by [Wenzel Jakob](http://rgl.epfl.ch/people/wjakob).
-Significant features and/or improvements to the code were contributed by
-Jonas Adler,
-Lori A. Burns,
-Sylvain Corlay,
-Trent Houliston,
-Axel Huebl,
-@hulucc,
-Sergey Lyskov
-Johan Mabille,
-Tomasz Miąsko,
-Dean Moldovan,
-Ben Pritchard,
-Jason Rhinelander,
-Boris Schäling,
-Pim Schellart,
-Henry Schreiner,
-Ivan Smirnov, and
-Patrick Stewart.
-
-### License
-
-pybind11 is provided under a BSD-style license that can be found in the
-``LICENSE`` file. By using, distributing, or contributing to this project,
-you agree to the terms and conditions of this license.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..1474cb9
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,175 @@
+.. figure:: https://github.com/pybind/pybind11/raw/master/docs/pybind11-logo.png
+   :alt: pybind11 logo
+
+**pybind11 — Seamless operability between C++11 and Python**
+
+|Latest Documentation Status| |Stable Documentation Status| |Gitter chat| |CI| |Build status|
+
+.. warning::
+
+   Combining older versions of pybind11 (< 2.6.0) with the brand-new Python
+   3.9.0 will trigger undefined behavior that typically manifests as crashes
+   during interpreter shutdown (but could also destroy your data. **You have been
+   warned.**)
+
+   We recommend that you wait for Python 3.9.1 slated for release in December,
+   which will include a `fix <https://github.com/python/cpython/pull/22670>`_
+   that resolves this problem.  In the meantime, please update to the latest
+   version of pybind11 (2.6.0 or newer), which includes a temporary workaround
+   specifically when Python 3.9.0 is detected at runtime.
+
+**pybind11** is a lightweight header-only library that exposes C++ types
+in Python and vice versa, mainly to create Python bindings of existing
+C++ code. Its goals and syntax are similar to the excellent
+`Boost.Python <http://www.boost.org/doc/libs/1_58_0/libs/python/doc/>`_
+library by David Abrahams: to minimize boilerplate code in traditional
+extension modules by inferring type information using compile-time
+introspection.
+
+The main issue with Boost.Python—and the reason for creating such a
+similar project—is Boost. Boost is an enormously large and complex suite
+of utility libraries that works with almost every C++ compiler in
+existence. This compatibility has its cost: arcane template tricks and
+workarounds are necessary to support the oldest and buggiest of compiler
+specimens. Now that C++11-compatible compilers are widely available,
+this heavy machinery has become an excessively large and unnecessary
+dependency.
+
+Think of this library as a tiny self-contained version of Boost.Python
+with everything stripped away that isn’t relevant for binding
+generation. Without comments, the core header files only require ~4K
+lines of code and depend on Python (2.7 or 3.5+, or PyPy) and the C++
+standard library. This compact implementation was possible thanks to
+some of the new C++11 language features (specifically: tuples, lambda
+functions and variadic templates). Since its creation, this library has
+grown beyond Boost.Python in many ways, leading to dramatically simpler
+binding code in many common situations.
+
+Tutorial and reference documentation is provided at
+`pybind11.readthedocs.io <https://pybind11.readthedocs.io/en/latest>`_.
+A PDF version of the manual is available
+`here <https://pybind11.readthedocs.io/_/downloads/en/latest/pdf/>`_.
+And the source code is always available at
+`github.com/pybind/pybind11 <https://github.com/pybind/pybind11>`_.
+
+
+Core features
+-------------
+
+
+pybind11 can map the following core C++ features to Python:
+
+- Functions accepting and returning custom data structures per value,
+  reference, or pointer
+- Instance methods and static methods
+- Overloaded functions
+- Instance attributes and static attributes
+- Arbitrary exception types
+- Enumerations
+- Callbacks
+- Iterators and ranges
+- Custom operators
+- Single and multiple inheritance
+- STL data structures
+- Smart pointers with reference counting like ``std::shared_ptr``
+- Internal references with correct reference counting
+- C++ classes with virtual (and pure virtual) methods can be extended
+  in Python
+
+Goodies
+-------
+
+In addition to the core functionality, pybind11 provides some extra
+goodies:
+
+- Python 2.7, 3.5+, and PyPy/PyPy3 7.3 are supported with an
+  implementation-agnostic interface.
+
+- It is possible to bind C++11 lambda functions with captured
+  variables. The lambda capture data is stored inside the resulting
+  Python function object.
+
+- pybind11 uses C++11 move constructors and move assignment operators
+  whenever possible to efficiently transfer custom data types.
+
+- It’s easy to expose the internal storage of custom data types through
+  Pythons’ buffer protocols. This is handy e.g. for fast conversion
+  between C++ matrix classes like Eigen and NumPy without expensive
+  copy operations.
+
+- pybind11 can automatically vectorize functions so that they are
+  transparently applied to all entries of one or more NumPy array
+  arguments.
+
+- Python’s slice-based access and assignment operations can be
+  supported with just a few lines of code.
+
+- Everything is contained in just a few header files; there is no need
+  to link against any additional libraries.
+
+- Binaries are generally smaller by a factor of at least 2 compared to
+  equivalent bindings generated by Boost.Python. A recent pybind11
+  conversion of PyRosetta, an enormous Boost.Python binding project,
+  `reported <http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf>`_
+  a binary size reduction of **5.4x** and compile time reduction by
+  **5.8x**.
+
+- Function signatures are precomputed at compile time (using
+  ``constexpr``), leading to smaller binaries.
+
+- With little extra effort, C++ types can be pickled and unpickled
+  similar to regular Python objects.
+
+Supported compilers
+-------------------
+
+1. Clang/LLVM 3.3 or newer (for Apple Xcode’s clang, this is 5.0.0 or
+   newer)
+2. GCC 4.8 or newer
+3. Microsoft Visual Studio 2015 Update 3 or newer
+4. Intel C++ compiler 18 or newer
+   (`possible issue <https://github.com/pybind/pybind11/pull/2573>`_ on 20.2)
+5. Cygwin/GCC (tested on 2.5.1)
+6. NVCC (CUDA 11.0 tested)
+7. NVIDIA PGI (20.7 and 20.9 tested)
+
+About
+-----
+
+This project was created by `Wenzel
+Jakob <http://rgl.epfl.ch/people/wjakob>`_. Significant features and/or
+improvements to the code were contributed by Jonas Adler, Lori A. Burns,
+Sylvain Corlay, Eric Cousineau, Ralf Grosse-Kunstleve, Trent Houliston, Axel
+Huebl, @hulucc, Yannick Jadoul, Sergey Lyskov Johan Mabille, Tomasz Miąsko,
+Dean Moldovan, Ben Pritchard, Jason Rhinelander, Boris Schäling,  Pim
+Schellart, Henry Schreiner, Ivan Smirnov, Boris Staletic, and Patrick Stewart.
+
+We thank Google for a generous financial contribution to the continuous
+integration infrastructure used by this project.
+
+
+Contributing
+~~~~~~~~~~~~
+
+See the `contributing
+guide <https://github.com/pybind/pybind11/blob/master/.github/CONTRIBUTING.md>`_
+for information on building and contributing to pybind11.
+
+License
+~~~~~~~
+
+pybind11 is provided under a BSD-style license that can be found in the
+`LICENSE <https://github.com/pybind/pybind11/blob/master/LICENSE>`_
+file. By using, distributing, or contributing to this project, you agree
+to the terms and conditions of this license.
+
+.. |Latest Documentation Status| image:: https://readthedocs.org/projects/pybind11/badge?version=latest
+   :target: http://pybind11.readthedocs.org/en/latest
+.. |Stable Documentation Status| image:: https://img.shields.io/badge/docs-stable-blue
+   :target: http://pybind11.readthedocs.org/en/stable
+.. |Gitter chat| image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg
+   :target: https://gitter.im/pybind/Lobby
+.. |CI| image:: https://github.com/pybind/pybind11/workflows/CI/badge.svg
+   :target: https://github.com/pybind/pybind11/actions
+.. |Build status| image:: https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true
+   :target: https://ci.appveyor.com/project/wjakob/pybind11
diff --git a/docs/Doxyfile b/docs/Doxyfile
index 1b9d129..c856295 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -18,3 +18,6 @@
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = NO
+PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS \
+                         PY_MAJOR_VERSION=3 \
+                         PYBIND11_NOINLINE
diff --git a/docs/advanced/cast/custom.rst b/docs/advanced/cast/custom.rst
index e4f99ac..a779444 100644
--- a/docs/advanced/cast/custom.rst
+++ b/docs/advanced/cast/custom.rst
@@ -29,9 +29,9 @@
     from example import print
     print(A())
 
-To register the necessary conversion routines, it is necessary to add
-a partial overload to the ``pybind11::detail::type_caster<T>`` template.
-Although this is an implementation detail, adding partial overloads to this
+To register the necessary conversion routines, it is necessary to add an
+instantiation of the ``pybind11::detail::type_caster<T>`` template.
+Although this is an implementation detail, adding an instantiation of this
 type is explicitly allowed.
 
 .. code-block:: cpp
diff --git a/docs/advanced/cast/eigen.rst b/docs/advanced/cast/eigen.rst
index 59ba08c..e01472d 100644
--- a/docs/advanced/cast/eigen.rst
+++ b/docs/advanced/cast/eigen.rst
@@ -274,7 +274,7 @@
 
 Eigen and numpy have fundamentally different notions of a vector.  In Eigen, a
 vector is simply a matrix with the number of columns or rows set to 1 at
-compile time (for a column vector or row vector, respectively).  Numpy, in
+compile time (for a column vector or row vector, respectively).  NumPy, in
 contrast, has comparable 2-dimensional 1xN and Nx1 arrays, but *also* has
 1-dimensional arrays of size N.
 
diff --git a/docs/advanced/cast/index.rst b/docs/advanced/cast/index.rst
index 54c1057..3ce9ea0 100644
--- a/docs/advanced/cast/index.rst
+++ b/docs/advanced/cast/index.rst
@@ -1,3 +1,5 @@
+.. _type-conversions:
+
 Type conversions
 ################
 
@@ -39,4 +41,3 @@
    chrono
    eigen
    custom
-
diff --git a/docs/advanced/cast/stl.rst b/docs/advanced/cast/stl.rst
index e48409f..70fde0d 100644
--- a/docs/advanced/cast/stl.rst
+++ b/docs/advanced/cast/stl.rst
@@ -5,7 +5,7 @@
 ====================
 
 When including the additional header file :file:`pybind11/stl.h`, conversions
-between ``std::vector<>``/``std::deque<>``/``std::list<>``/``std::array<>``,
+between ``std::vector<>``/``std::deque<>``/``std::list<>``/``std::array<>``/``std::valarray<>``,
 ``std::set<>``/``std::unordered_set<>``, and
 ``std::map<>``/``std::unordered_map<>`` and the Python ``list``, ``set`` and
 ``dict`` data structures are automatically enabled. The types ``std::pair<>``
@@ -157,7 +157,7 @@
 
 before any binding code (e.g. invocations to ``class_::def()``, etc.). This
 macro must be specified at the top level (and outside of any namespaces), since
-it instantiates a partial template overload. If your binding code consists of
+it adds a template instantiation of ``type_caster``. If your binding code consists of
 multiple compilation units, it must be present in every file (typically via a
 common header) preceding any usage of ``std::vector<int>``. Opaque types must
 also have a corresponding ``class_`` declaration to associate them with a name
diff --git a/docs/advanced/classes.rst b/docs/advanced/classes.rst
index ae5907d..4927902 100644
--- a/docs/advanced/classes.rst
+++ b/docs/advanced/classes.rst
@@ -71,7 +71,7 @@
 
         /* Trampoline (need one for each virtual function) */
         std::string go(int n_times) override {
-            PYBIND11_OVERLOAD_PURE(
+            PYBIND11_OVERRIDE_PURE(
                 std::string, /* Return type */
                 Animal,      /* Parent class */
                 go,          /* Name of function in C++ (must match Python name) */
@@ -80,10 +80,10 @@
         }
     };
 
-The macro :c:macro:`PYBIND11_OVERLOAD_PURE` should be used for pure virtual
-functions, and :c:macro:`PYBIND11_OVERLOAD` should be used for functions which have
-a default implementation.  There are also two alternate macros 
-:c:macro:`PYBIND11_OVERLOAD_PURE_NAME` and :c:macro:`PYBIND11_OVERLOAD_NAME` which
+The macro :c:macro:`PYBIND11_OVERRIDE_PURE` should be used for pure virtual
+functions, and :c:macro:`PYBIND11_OVERRIDE` should be used for functions which have
+a default implementation.  There are also two alternate macros
+:c:macro:`PYBIND11_OVERRIDE_PURE_NAME` and :c:macro:`PYBIND11_OVERRIDE_NAME` which
 take a string-valued name argument between the *Parent class* and *Name of the
 function* slots, which defines the name of function in Python. This is required
 when the C++ and Python versions of the
@@ -122,7 +122,7 @@
 
 Note, however, that the above is sufficient for allowing python classes to
 extend ``Animal``, but not ``Dog``: see :ref:`virtual_and_inheritance` for the
-necessary steps required to providing proper overload support for inherited
+necessary steps required to providing proper overriding support for inherited
 classes.
 
 The Python session below shows how to override ``Animal::go`` and invoke it via
@@ -149,13 +149,17 @@
 will generally leave the C++ instance in an invalid state and cause undefined
 behavior if the C++ instance is subsequently used.
 
+.. versionchanged:: 2.6
+   The default pybind11 metaclass will throw a ``TypeError`` when it detects
+   that ``__init__`` was not called by a derived class.
+
 Here is an example:
 
 .. code-block:: python
 
     class Dachshund(Dog):
         def __init__(self, name):
-            Dog.__init__(self) # Without this, undefined behavior may occur if the C++ portions are referenced.
+            Dog.__init__(self) # Without this, a TypeError is raised.
             self.name = name
         def bark(self):
             return "yap!"
@@ -177,15 +181,24 @@
 
     - because in these cases there is no C++ variable to reference (the value
       is stored in the referenced Python variable), pybind11 provides one in
-      the PYBIND11_OVERLOAD macros (when needed) with static storage duration.
-      Note that this means that invoking the overloaded method on *any*
+      the PYBIND11_OVERRIDE macros (when needed) with static storage duration.
+      Note that this means that invoking the overridden method on *any*
       instance will change the referenced value stored in *all* instances of
       that type.
 
     - Attempts to modify a non-const reference will not have the desired
       effect: it will change only the static cache variable, but this change
       will not propagate to underlying Python instance, and the change will be
-      replaced the next time the overload is invoked.
+      replaced the next time the override is invoked.
+
+.. warning::
+
+    The :c:macro:`PYBIND11_OVERRIDE` and accompanying macros used to be called
+    ``PYBIND11_OVERLOAD`` up until pybind11 v2.5.0, and :func:`get_override`
+    used to be called ``get_overload``. This naming was corrected and the older
+    macro and function names may soon be deprecated, in order to reduce
+    confusion with overloaded functions and methods and ``py::overload_cast``
+    (see :ref:`classes`).
 
 .. seealso::
 
@@ -233,20 +246,20 @@
     class PyAnimal : public Animal {
     public:
         using Animal::Animal; // Inherit constructors
-        std::string go(int n_times) override { PYBIND11_OVERLOAD_PURE(std::string, Animal, go, n_times); }
-        std::string name() override { PYBIND11_OVERLOAD(std::string, Animal, name, ); }
+        std::string go(int n_times) override { PYBIND11_OVERRIDE_PURE(std::string, Animal, go, n_times); }
+        std::string name() override { PYBIND11_OVERRIDE(std::string, Animal, name, ); }
     };
     class PyDog : public Dog {
     public:
         using Dog::Dog; // Inherit constructors
-        std::string go(int n_times) override { PYBIND11_OVERLOAD(std::string, Dog, go, n_times); }
-        std::string name() override { PYBIND11_OVERLOAD(std::string, Dog, name, ); }
-        std::string bark() override { PYBIND11_OVERLOAD(std::string, Dog, bark, ); }
+        std::string go(int n_times) override { PYBIND11_OVERRIDE(std::string, Dog, go, n_times); }
+        std::string name() override { PYBIND11_OVERRIDE(std::string, Dog, name, ); }
+        std::string bark() override { PYBIND11_OVERRIDE(std::string, Dog, bark, ); }
     };
 
 .. note::
 
-    Note the trailing commas in the ``PYBIND11_OVERLOAD`` calls to ``name()``
+    Note the trailing commas in the ``PYBIND11_OVERIDE`` calls to ``name()``
     and ``bark()``. These are needed to portably implement a trampoline for a
     function that does not take any arguments. For functions that take
     a nonzero number of arguments, the trailing comma must be omitted.
@@ -261,9 +274,9 @@
     class PyHusky : public Husky {
     public:
         using Husky::Husky; // Inherit constructors
-        std::string go(int n_times) override { PYBIND11_OVERLOAD_PURE(std::string, Husky, go, n_times); }
-        std::string name() override { PYBIND11_OVERLOAD(std::string, Husky, name, ); }
-        std::string bark() override { PYBIND11_OVERLOAD(std::string, Husky, bark, ); }
+        std::string go(int n_times) override { PYBIND11_OVERRIDE_PURE(std::string, Husky, go, n_times); }
+        std::string name() override { PYBIND11_OVERRIDE(std::string, Husky, name, ); }
+        std::string bark() override { PYBIND11_OVERRIDE(std::string, Husky, bark, ); }
     };
 
 There is, however, a technique that can be used to avoid this duplication
@@ -276,15 +289,15 @@
     template <class AnimalBase = Animal> class PyAnimal : public AnimalBase {
     public:
         using AnimalBase::AnimalBase; // Inherit constructors
-        std::string go(int n_times) override { PYBIND11_OVERLOAD_PURE(std::string, AnimalBase, go, n_times); }
-        std::string name() override { PYBIND11_OVERLOAD(std::string, AnimalBase, name, ); }
+        std::string go(int n_times) override { PYBIND11_OVERRIDE_PURE(std::string, AnimalBase, go, n_times); }
+        std::string name() override { PYBIND11_OVERRIDE(std::string, AnimalBase, name, ); }
     };
     template <class DogBase = Dog> class PyDog : public PyAnimal<DogBase> {
     public:
         using PyAnimal<DogBase>::PyAnimal; // Inherit constructors
         // Override PyAnimal's pure virtual go() with a non-pure one:
-        std::string go(int n_times) override { PYBIND11_OVERLOAD(std::string, DogBase, go, n_times); }
-        std::string bark() override { PYBIND11_OVERLOAD(std::string, DogBase, bark, ); }
+        std::string go(int n_times) override { PYBIND11_OVERRIDE(std::string, DogBase, go, n_times); }
+        std::string bark() override { PYBIND11_OVERRIDE(std::string, DogBase, bark, ); }
     };
 
 This technique has the advantage of requiring just one trampoline method to be
@@ -298,8 +311,8 @@
 .. code-block:: cpp
 
     py::class_<Animal, PyAnimal<>> animal(m, "Animal");
-    py::class_<Dog, PyDog<>> dog(m, "Dog");
-    py::class_<Husky, PyDog<Husky>> husky(m, "Husky");
+    py::class_<Dog, Animal, PyDog<>> dog(m, "Dog");
+    py::class_<Husky, Dog, PyDog<Husky>> husky(m, "Husky");
     // ... add animal, dog, husky definitions
 
 Note that ``Husky`` did not require a dedicated trampoline template class at
@@ -337,7 +350,7 @@
 for performance reasons: when the trampoline class is not needed for anything
 except virtual method dispatching, not initializing the trampoline class
 improves performance by avoiding needing to do a run-time check to see if the
-inheriting python instance has an overloaded method.
+inheriting python instance has an overridden method.
 
 Sometimes, however, it is useful to always initialize a trampoline class as an
 intermediate class that does more than just handle virtual method dispatching.
@@ -368,7 +381,7 @@
 this is to use the method body of the trampoline class to do conversions to the
 input and return of the Python method.
 
-The main building block to do so is the :func:`get_overload`, this function
+The main building block to do so is the :func:`get_override`, this function
 allows retrieving a method implemented in Python from within the trampoline's
 methods. Consider for example a C++ method which has the signature
 ``bool myMethod(int32_t& value)``, where the return indicates whether
@@ -380,10 +393,10 @@
     bool MyClass::myMethod(int32_t& value)
     {
         pybind11::gil_scoped_acquire gil;  // Acquire the GIL while in this scope.
-        // Try to look up the overloaded method on the Python side.
-        pybind11::function overload = pybind11::get_overload(this, "myMethod");
-        if (overload) {  // method is found
-            auto obj = overload(value);  // Call the Python function.
+        // Try to look up the overridden method on the Python side.
+        pybind11::function override = pybind11::get_override(this, "myMethod");
+        if (override) {  // method is found
+            auto obj = override(value);  // Call the Python function.
             if (py::isinstance<py::int_>(obj)) {  // check if it returned a Python integer type
                 value = obj.cast<int32_t>();  // Cast it and assign it to the value.
                 return true;  // Return true; value should be used.
@@ -554,6 +567,46 @@
     py::class_<MyClass, std::unique_ptr<MyClass, py::nodelete>>(m, "MyClass")
         .def(py::init<>())
 
+.. _destructors_that_call_python:
+
+Destructors that call Python
+============================
+
+If a Python function is invoked from a C++ destructor, an exception may be thrown
+of type :class:`error_already_set`. If this error is thrown out of a class destructor,
+``std::terminate()`` will be called, terminating the process. Class destructors
+must catch all exceptions of type :class:`error_already_set` to discard the Python
+exception using :func:`error_already_set::discard_as_unraisable`.
+
+Every Python function should be treated as *possibly throwing*. When a Python generator
+stops yielding items, Python will throw a ``StopIteration`` exception, which can pass
+though C++ destructors if the generator's stack frame holds the last reference to C++
+objects.
+
+For more information, see :ref:`the documentation on exceptions <unraisable_exceptions>`.
+
+.. code-block:: cpp
+
+    class MyClass {
+    public:
+        ~MyClass() {
+            try {
+                py::print("Even printing is dangerous in a destructor");
+                py::exec("raise ValueError('This is an unraisable exception')");
+            } catch (py::error_already_set &e) {
+                // error_context should be information about where/why the occurred,
+                // e.g. use __func__ to get the name of the current function
+                e.discard_as_unraisable(__func__);
+            }
+        }
+    };
+
+.. note::
+
+    pybind11 does not support C++ destructors marked ``noexcept(false)``.
+
+.. versionadded:: 2.6
+
 .. _implicit_conversions:
 
 Implicit conversions
@@ -768,13 +821,17 @@
     p.setExtra(15)
     data = pickle.dumps(p, 2)
 
-Note that only the cPickle module is supported on Python 2.7. The second
-argument to ``dumps`` is also crucial: it selects the pickle protocol version
-2, since the older version 1 is not supported. Newer versions are also fine—for
-instance, specify ``-1`` to always use the latest available version. Beware:
-failure to follow these instructions will cause important pybind11 memory
-allocation routines to be skipped during unpickling, which will likely lead to
-memory corruption and/or segmentation faults.
+
+.. note::
+    Note that only the cPickle module is supported on Python 2.7.
+
+    The second argument to ``dumps`` is also crucial: it selects the pickle
+    protocol version 2, since the older version 1 is not supported. Newer
+    versions are also fine—for instance, specify ``-1`` to always use the
+    latest available version. Beware: failure to follow these instructions
+    will cause important pybind11 memory allocation routines to be skipped
+    during unpickling, which will likely lead to memory corruption and/or
+    segmentation faults.
 
 .. seealso::
 
@@ -784,6 +841,38 @@
 
 .. [#f3] http://docs.python.org/3/library/pickle.html#pickling-class-instances
 
+Deepcopy support
+================
+
+Python normally uses references in assignments. Sometimes a real copy is needed
+to prevent changing all copies. The ``copy`` module [#f5]_ provides these
+capabilities.
+
+On Python 3, a class with pickle support is automatically also (deep)copy
+compatible. However, performance can be improved by adding custom
+``__copy__`` and ``__deepcopy__`` methods. With Python 2.7, these custom methods
+are mandatory for (deep)copy compatibility, because pybind11 only supports
+cPickle.
+
+For simple classes (deep)copy can be enabled by using the copy constructor,
+which should look as follows:
+
+.. code-block:: cpp
+
+    py::class_<Copyable>(m, "Copyable")
+        .def("__copy__",  [](const Copyable &self) {
+            return Copyable(self);
+        })
+        .def("__deepcopy__", [](const Copyable &self, py::dict) {
+            return Copyable(self);
+        }, "memo"_a);
+
+.. note::
+
+    Dynamic attributes will not be copied in this example.
+
+.. [#f5] https://docs.python.org/3/library/copy.html
+
 Multiple Inheritance
 ====================
 
@@ -1024,7 +1113,7 @@
 
     class Trampoline : public A {
     public:
-        int foo() const override { PYBIND11_OVERLOAD(int, A, foo, ); }
+        int foo() const override { PYBIND11_OVERRIDE(int, A, foo, ); }
     };
 
     class Publicist : public A {
@@ -1042,6 +1131,34 @@
     ``.def("foo", static_cast<int (A::*)() const>(&Publicist::foo));``
     where ``int (A::*)() const`` is the type of ``A::foo``.
 
+Binding final classes
+=====================
+
+Some classes may not be appropriate to inherit from. In C++11, classes can
+use the ``final`` specifier to ensure that a class cannot be inherited from.
+The ``py::is_final`` attribute can be used to ensure that Python classes
+cannot inherit from a specified type. The underlying C++ type does not need
+to be declared final.
+
+.. code-block:: cpp
+
+    class IsFinal final {};
+
+    py::class_<IsFinal>(m, "IsFinal", py::is_final());
+
+When you try to inherit from such a class in Python, you will now get this
+error:
+
+.. code-block:: pycon
+
+    >>> class PyFinalChild(IsFinal):
+    ...     pass
+    TypeError: type 'IsFinal' is not an acceptable base type
+
+.. note:: This attribute is currently ignored on PyPy
+
+.. versionadded:: 2.6
+
 Custom automatic downcasters
 ============================
 
@@ -1124,3 +1241,21 @@
     more complete example, including a demonstration of how to provide
     automatic downcasting for an entire class hierarchy without
     writing one get() function for each class.
+
+Accessing the type object
+=========================
+
+You can get the type object from a C++ class that has already been registered using:
+
+.. code-block:: python
+
+    py::type T_py = py::type::of<T>();
+
+You can directly use ``py::type::of(ob)`` to get the type object from any python
+object, just like ``type(ob)`` in Python.
+
+.. note::
+
+    Other types, like ``py::type::of<int>()``, do not work, see :ref:`type-conversions`.
+
+.. versionadded:: 2.6
diff --git a/docs/advanced/embedding.rst b/docs/advanced/embedding.rst
index 3930316..dfdaad2 100644
--- a/docs/advanced/embedding.rst
+++ b/docs/advanced/embedding.rst
@@ -18,7 +18,7 @@
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 3.0)
+    cmake_minimum_required(VERSION 3.4)
     project(example)
 
     find_package(pybind11 REQUIRED)  # or `add_subdirectory(pybind11)`
@@ -108,11 +108,11 @@
 Importing modules
 =================
 
-Python modules can be imported using `module::import()`:
+Python modules can be imported using `module_::import()`:
 
 .. code-block:: cpp
 
-    py::module sys = py::module::import("sys");
+    py::module_ sys = py::module_::import("sys");
     py::print(sys.attr("path"));
 
 For convenience, the current working directory is included in ``sys.path`` when
@@ -128,12 +128,12 @@
 
 .. code-block:: cpp
 
-    py::module calc = py::module::import("calc");
+    py::module_ calc = py::module_::import("calc");
     py::object result = calc.attr("add")(1, 2);
     int n = result.cast<int>();
     assert(n == 3);
 
-Modules can be reloaded using `module::reload()` if the source is modified e.g.
+Modules can be reloaded using `module_::reload()` if the source is modified e.g.
 by an external process. This can be useful in scenarios where the application
 imports a user defined data processing script which needs to be updated after
 changes by the user. Note that this function does not reload modules recursively.
@@ -153,7 +153,7 @@
     namespace py = pybind11;
 
     PYBIND11_EMBEDDED_MODULE(fast_calc, m) {
-        // `m` is a `py::module` which is used to bind functions and classes
+        // `m` is a `py::module_` which is used to bind functions and classes
         m.def("add", [](int i, int j) {
             return i + j;
         });
@@ -162,7 +162,7 @@
     int main() {
         py::scoped_interpreter guard{};
 
-        auto fast_calc = py::module::import("fast_calc");
+        auto fast_calc = py::module_::import("fast_calc");
         auto result = fast_calc.attr("add")(1, 2).cast<int>();
         assert(result == 3);
     }
@@ -196,7 +196,7 @@
     int main() {
         py::scoped_interpreter guard{};
 
-        auto py_module = py::module::import("py_module");
+        auto py_module = py::module_::import("py_module");
 
         auto locals = py::dict("fmt"_a="{} + {} = {}", **py_module.attr("__dict__"));
         assert(locals["a"].cast<int>() == 1);
diff --git a/docs/advanced/exceptions.rst b/docs/advanced/exceptions.rst
index 75ad7f7..5eae556 100644
--- a/docs/advanced/exceptions.rst
+++ b/docs/advanced/exceptions.rst
@@ -1,18 +1,24 @@
 Exceptions
 ##########
 
-Built-in exception translation
-==============================
+Built-in C++ to Python exception translation
+============================================
 
-When C++ code invoked from Python throws an ``std::exception``, it is
-automatically converted into a Python ``Exception``. pybind11 defines multiple
-special exception classes that will map to different types of Python
-exceptions:
+When Python calls C++ code through pybind11, pybind11 provides a C++ exception handler
+that will trap C++ exceptions, translate them to the corresponding Python exception,
+and raise them so that Python code can handle them.
+
+pybind11 defines translations for ``std::exception`` and its standard
+subclasses, and several special exception classes that translate to specific
+Python exceptions. Note that these are not actually Python exceptions, so they
+cannot be examined using the Python C API. Instead, they are pure C++ objects
+that pybind11 will translate the corresponding Python exception when they arrive
+at its exception handler.
 
 .. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}|
 
 +--------------------------------------+--------------------------------------+
-|  C++ exception type                  |  Python exception type               |
+|  Exception thrown by C++             |  Translated to Python exception type |
 +======================================+======================================+
 | :class:`std::exception`              | ``RuntimeError``                     |
 +--------------------------------------+--------------------------------------+
@@ -46,16 +52,11 @@
 |                                      | ``__setitem__`` in dict-like         |
 |                                      | objects, etc.)                       |
 +--------------------------------------+--------------------------------------+
-| :class:`pybind11::error_already_set` | Indicates that the Python exception  |
-|                                      | flag has already been set via Python |
-|                                      | API calls from C++ code; this C++    |
-|                                      | exception is used to propagate such  |
-|                                      | a Python exception back to Python.   |
-+--------------------------------------+--------------------------------------+
 
-When a Python function invoked from C++ throws an exception, it is converted
-into a C++ exception of type :class:`error_already_set` whose string payload
-contains a textual summary.
+Exception translation is not bidirectional. That is, *catching* the C++
+exceptions defined above above will not trap exceptions that originate from
+Python. For that, catch :class:`pybind11::error_already_set`. See :ref:`below
+<handling_python_exceptions_cpp>` for further details.
 
 There is also a special exception :class:`cast_error` that is thrown by
 :func:`handle::call` when the input arguments cannot be converted to Python
@@ -78,6 +79,19 @@
 module and automatically converts any encountered exceptions of type ``CppExp``
 into Python exceptions of type ``PyExp``.
 
+It is possible to specify base class for the exception using the third
+parameter, a `handle`:
+
+.. code-block:: cpp
+
+    py::register_exception<CppExp>(module, "PyExp", PyExc_RuntimeError);
+
+Then `PyExp` can be caught both as `PyExp` and `RuntimeError`.
+
+The class objects of the built-in Python exceptions are listed in the Python
+documentation on `Standard Exceptions <https://docs.python.org/3/c-api/exceptions.html#standard-exceptions>`_.
+The default base class is `PyExc_Exception`.
+
 When more advanced exception translation is needed, the function
 ``py::register_exception_translator(translator)`` can be used to register
 functions that can translate arbitrary exception types (and which may include
@@ -100,7 +114,6 @@
 to make this a static declaration when using it inside a lambda expression
 without requiring capturing).
 
-
 The following example demonstrates this for a hypothetical exception classes
 ``MyCustomException`` and ``OtherException``: the first is translated to a
 custom python exception ``MyCustomError``, while the second is translated to a
@@ -134,7 +147,7 @@
 
 .. note::
 
-    You must call either ``PyErr_SetString`` or a custom exception's call
+    Call either ``PyErr_SetString`` or a custom exception's call
     operator (``exc(string)``) for every exception caught in a custom exception
     translator.  Failure to do so will cause Python to crash with ``SystemError:
     error return without exception set``.
@@ -142,3 +155,144 @@
     Exceptions that you do not plan to handle should simply not be caught, or
     may be explicitly (re-)thrown to delegate it to the other,
     previously-declared existing exception translators.
+
+.. _handling_python_exceptions_cpp:
+
+Handling exceptions from Python in C++
+======================================
+
+When C++ calls Python functions, such as in a callback function or when
+manipulating Python objects, and Python raises an ``Exception``, pybind11
+converts the Python exception into a C++ exception of type
+:class:`pybind11::error_already_set` whose payload contains a C++ string textual
+summary and the actual Python exception. ``error_already_set`` is used to
+propagate Python exception back to Python (or possibly, handle them in C++).
+
+.. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}|
+
++--------------------------------------+--------------------------------------+
+|  Exception raised in Python          |  Thrown as C++ exception type        |
++======================================+======================================+
+| Any Python ``Exception``             | :class:`pybind11::error_already_set` |
++--------------------------------------+--------------------------------------+
+
+For example:
+
+.. code-block:: cpp
+
+    try {
+        // open("missing.txt", "r")
+        auto file = py::module_::import("io").attr("open")("missing.txt", "r");
+        auto text = file.attr("read")();
+        file.attr("close")();
+    } catch (py::error_already_set &e) {
+        if (e.matches(PyExc_FileNotFoundError)) {
+            py::print("missing.txt not found");
+        } else if (e.match(PyExc_PermissionError)) {
+            py::print("missing.txt found but not accessible");
+        } else {
+            throw;
+        }
+    }
+
+Note that C++ to Python exception translation does not apply here, since that is
+a method for translating C++ exceptions to Python, not vice versa. The error raised
+from Python is always ``error_already_set``.
+
+This example illustrates this behavior:
+
+.. code-block:: cpp
+
+    try {
+        py::eval("raise ValueError('The Ring')");
+    } catch (py::value_error &boromir) {
+        // Boromir never gets the ring
+        assert(false);
+    } catch (py::error_already_set &frodo) {
+        // Frodo gets the ring
+        py::print("I will take the ring");
+    }
+
+    try {
+        // py::value_error is a request for pybind11 to raise a Python exception
+        throw py::value_error("The ball");
+    } catch (py::error_already_set &cat) {
+        // cat won't catch the ball since
+        // py::value_error is not a Python exception
+        assert(false);
+    } catch (py::value_error &dog) {
+        // dog will catch the ball
+        py::print("Run Spot run");
+        throw;  // Throw it again (pybind11 will raise ValueError)
+    }
+
+Handling errors from the Python C API
+=====================================
+
+Where possible, use :ref:`pybind11 wrappers <wrappers>` instead of calling
+the Python C API directly. When calling the Python C API directly, in
+addition to manually managing reference counts, one must follow the pybind11
+error protocol, which is outlined here.
+
+After calling the Python C API, if Python returns an error,
+``throw py::error_already_set();``, which allows pybind11 to deal with the
+exception and pass it back to the Python interpreter. This includes calls to
+the error setting functions such as ``PyErr_SetString``.
+
+.. code-block:: cpp
+
+    PyErr_SetString(PyExc_TypeError, "C API type error demo");
+    throw py::error_already_set();
+
+    // But it would be easier to simply...
+    throw py::type_error("pybind11 wrapper type error");
+
+Alternately, to ignore the error, call `PyErr_Clear
+<https://docs.python.org/3/c-api/exceptions.html#c.PyErr_Clear>`_.
+
+Any Python error must be thrown or cleared, or Python/pybind11 will be left in
+an invalid state.
+
+.. _unraisable_exceptions:
+
+Handling unraisable exceptions
+==============================
+
+If a Python function invoked from a C++ destructor or any function marked
+``noexcept(true)`` (collectively, "noexcept functions") throws an exception, there
+is no way to propagate the exception, as such functions may not throw.
+Should they throw or fail to catch any exceptions in their call graph,
+the C++ runtime calls ``std::terminate()`` to abort immediately.
+
+Similarly, Python exceptions raised in a class's ``__del__`` method do not
+propagate, but are logged by Python as an unraisable error. In Python 3.8+, a
+`system hook is triggered
+<https://docs.python.org/3/library/sys.html#sys.unraisablehook>`_
+and an auditing event is logged.
+
+Any noexcept function should have a try-catch block that traps
+class:`error_already_set` (or any other exception that can occur). Note that
+pybind11 wrappers around Python exceptions such as
+:class:`pybind11::value_error` are *not* Python exceptions; they are C++
+exceptions that pybind11 catches and converts to Python exceptions. Noexcept
+functions cannot propagate these exceptions either. A useful approach is to
+convert them to Python exceptions and then ``discard_as_unraisable`` as shown
+below.
+
+.. code-block:: cpp
+
+    void nonthrowing_func() noexcept(true) {
+        try {
+            // ...
+        } catch (py::error_already_set &eas) {
+            // Discard the Python error using Python APIs, using the C++ magic
+            // variable __func__. Python already knows the type and value and of the
+            // exception object.
+            eas.discard_as_unraisable(__func__);
+        } catch (const std::exception &e) {
+            // Log and discard C++ exceptions.
+            third_party::log(e);
+        }
+    }
+
+.. versionadded:: 2.6
diff --git a/docs/advanced/functions.rst b/docs/advanced/functions.rst
index 3e1a3ff..ebdff9c 100644
--- a/docs/advanced/functions.rst
+++ b/docs/advanced/functions.rst
@@ -17,7 +17,7 @@
 type information, it is not clear whether Python should take charge of the
 returned value and eventually free its resources, or if this is handled on the
 C++ side. For this reason, pybind11 provides a several *return value policy*
-annotations that can be passed to the :func:`module::def` and
+annotations that can be passed to the :func:`module_::def` and
 :func:`class_::def` functions. The default policy is
 :enum:`return_value_policy::automatic`.
 
@@ -360,7 +360,55 @@
 .. code-block:: cpp
 
     py::class_<MyClass>("MyClass")
-        .def("myFunction", py::arg("arg") = (SomeType *) nullptr);
+        .def("myFunction", py::arg("arg") = static_cast<SomeType *>(nullptr));
+
+Keyword-only arguments
+======================
+
+Python 3 introduced keyword-only arguments by specifying an unnamed ``*``
+argument in a function definition:
+
+.. code-block:: python
+
+    def f(a, *, b):  # a can be positional or via keyword; b must be via keyword
+        pass
+
+    f(a=1, b=2)  # good
+    f(b=2, a=1)  # good
+    f(1, b=2)    # good
+    f(1, 2)      # TypeError: f() takes 1 positional argument but 2 were given
+
+Pybind11 provides a ``py::kw_only`` object that allows you to implement
+the same behaviour by specifying the object between positional and keyword-only
+argument annotations when registering the function:
+
+.. code-block:: cpp
+
+    m.def("f", [](int a, int b) { /* ... */ },
+          py::arg("a"), py::kw_only(), py::arg("b"));
+
+Note that you currently cannot combine this with a ``py::args`` argument.  This
+feature does *not* require Python 3 to work.
+
+.. versionadded:: 2.6
+
+Positional-only arguments
+=========================
+
+Python 3.8 introduced a new positional-only argument syntax, using ``/`` in the
+function definition (note that this has been a convention for CPython
+positional arguments, such as in ``pow()``, since Python 2). You can
+do the same thing in any version of Python using ``py::pos_only()``:
+
+.. code-block:: cpp
+
+   m.def("f", [](int a, int b) { /* ... */ },
+          py::arg("a"), py::pos_only(), py::arg("b"));
+
+You now cannot give argument ``a`` by keyword. This can be combined with
+keyword-only arguments, as well.
+
+.. versionadded:: 2.6
 
 .. _nonconverting_arguments:
 
@@ -492,11 +540,13 @@
 If the second pass also fails a ``TypeError`` is raised.
 
 Within each pass, overloads are tried in the order they were registered with
-pybind11.
+pybind11. If the ``py::prepend()`` tag is added to the definition, a function
+can be placed at the beginning of the overload sequence instead, allowing user
+overloads to proceed built in functions.
 
 What this means in practice is that pybind11 will prefer any overload that does
-not require conversion of arguments to an overload that does, but otherwise prefers
-earlier-defined overloads to later-defined ones.
+not require conversion of arguments to an overload that does, but otherwise
+prefers earlier-defined overloads to later-defined ones.
 
 .. note::
 
@@ -505,3 +555,7 @@
     requiring one conversion over one requiring three, but only prioritizes
     overloads requiring no conversion at all to overloads that require
     conversion of at least one argument.
+
+.. versionadded:: 2.6
+
+    The ``py::prepend()`` tag.
diff --git a/docs/advanced/misc.rst b/docs/advanced/misc.rst
index 5b38ec7..b3f3b22 100644
--- a/docs/advanced/misc.rst
+++ b/docs/advanced/misc.rst
@@ -7,14 +7,14 @@
 ==========================================
 
 pybind11 provides a few convenience macros such as
-:func:`PYBIND11_DECLARE_HOLDER_TYPE` and ``PYBIND11_OVERLOAD_*``. Since these
+:func:`PYBIND11_DECLARE_HOLDER_TYPE` and ``PYBIND11_OVERRIDE_*``. Since these
 are "just" macros that are evaluated in the preprocessor (which has no concept
 of types), they *will* get confused by commas in a template argument; for
 example, consider:
 
 .. code-block:: cpp
 
-    PYBIND11_OVERLOAD(MyReturnType<T1, T2>, Class<T3, T4>, func)
+    PYBIND11_OVERRIDE(MyReturnType<T1, T2>, Class<T3, T4>, func)
 
 The limitation of the C preprocessor interprets this as five arguments (with new
 arguments beginning after each comma) rather than three.  To get around this,
@@ -26,10 +26,10 @@
     // Version 1: using a type alias
     using ReturnType = MyReturnType<T1, T2>;
     using ClassType = Class<T3, T4>;
-    PYBIND11_OVERLOAD(ReturnType, ClassType, func);
+    PYBIND11_OVERRIDE(ReturnType, ClassType, func);
 
     // Version 2: using the PYBIND11_TYPE macro:
-    PYBIND11_OVERLOAD(PYBIND11_TYPE(MyReturnType<T1, T2>),
+    PYBIND11_OVERRIDE(PYBIND11_TYPE(MyReturnType<T1, T2>),
                       PYBIND11_TYPE(Class<T3, T4>), func)
 
 The ``PYBIND11_MAKE_OPAQUE`` macro does *not* require the above workarounds.
@@ -59,7 +59,7 @@
             /* Acquire GIL before calling Python code */
             py::gil_scoped_acquire acquire;
 
-            PYBIND11_OVERLOAD_PURE(
+            PYBIND11_OVERRIDE_PURE(
                 std::string, /* Return type */
                 Animal,      /* Parent class */
                 go,          /* Name of function */
@@ -132,7 +132,7 @@
 
 .. code-block:: cpp
 
-    py::object pet = (py::object) py::module::import("basic").attr("Pet");
+    py::object pet = (py::object) py::module_::import("basic").attr("Pet");
 
     py::class_<Dog>(m, "Dog", pet)
         .def(py::init<const std::string &>())
@@ -146,7 +146,7 @@
 
 .. code-block:: cpp
 
-    py::module::import("basic");
+    py::module_::import("basic");
 
     py::class_<Dog, Pet>(m, "Dog")
         .def(py::init<const std::string &>())
@@ -176,9 +176,9 @@
 
 .. code-block:: cpp
 
-    auto data = (MyData *) py::get_shared_data("mydata");
+    auto data = reinterpret_cast<MyData *>(py::get_shared_data("mydata"));
     if (!data)
-        data = (MyData *) py::set_shared_data("mydata", new MyData(42));
+        data = static_cast<MyData *>(py::set_shared_data("mydata", new MyData(42)));
 
 If the above snippet was used in several separately compiled extension modules,
 the first one to be imported would create a ``MyData`` instance and associate
@@ -218,12 +218,12 @@
 
 Both approaches also expose a potentially dangerous ``_cleanup`` attribute in
 Python, which may be undesirable from an API standpoint (a premature explicit
-call from Python might lead to undefined behavior). Yet another approach that 
+call from Python might lead to undefined behavior). Yet another approach that
 avoids this issue involves weak reference with a cleanup callback:
 
 .. code-block:: cpp
 
-    // Register a callback function that is invoked when the BaseClass object is colelcted
+    // Register a callback function that is invoked when the BaseClass object is collected
     py::cpp_function cleanup_callback(
         [](py::handle weakref) {
             // perform cleanup here -- this function is called with the GIL held
@@ -237,13 +237,13 @@
 
 .. note::
 
-    PyPy (at least version 5.9) does not garbage collect objects when the
-    interpreter exits. An alternative approach (which also works on CPython) is to use
-    the :py:mod:`atexit` module [#f7]_, for example:
+    PyPy does not garbage collect objects when the interpreter exits. An alternative
+    approach (which also works on CPython) is to use the :py:mod:`atexit` module [#f7]_,
+    for example:
 
     .. code-block:: cpp
 
-        auto atexit = py::module::import("atexit");
+        auto atexit = py::module_::import("atexit");
         atexit.attr("register")(py::cpp_function([]() {
             // perform cleanup here -- this function is called with the GIL held
         }));
@@ -283,9 +283,9 @@
         ----------
     )mydelimiter");
 
-By default, pybind11 automatically generates and prepends a signature to the docstring of a function 
-registered with ``module::def()`` and ``class_::def()``. Sometimes this
-behavior is not desirable, because you want to provide your own signature or remove 
+By default, pybind11 automatically generates and prepends a signature to the docstring of a function
+registered with ``module_::def()`` and ``class_::def()``. Sometimes this
+behavior is not desirable, because you want to provide your own signature or remove
 the docstring completely to exclude the function from the Sphinx documentation.
 The class ``options`` allows you to selectively suppress auto-generated signatures:
 
@@ -298,9 +298,40 @@
         m.def("add", [](int a, int b) { return a + b; }, "A function which adds two numbers");
     }
 
-Note that changes to the settings affect only function bindings created during the 
-lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function, 
+Note that changes to the settings affect only function bindings created during the
+lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function,
 the default settings are restored to prevent unwanted side effects.
 
 .. [#f4] http://www.sphinx-doc.org
 .. [#f5] http://github.com/pybind/python_example
+
+.. _avoiding-cpp-types-in-docstrings:
+
+Avoiding C++ types in docstrings
+================================
+
+Docstrings are generated at the time of the declaration, e.g. when ``.def(...)`` is called.
+At this point parameter and return types should be known to pybind11.
+If a custom type is not exposed yet through a ``py::class_`` constructor or a custom type caster,
+its C++ type name will be used instead to generate the signature in the docstring:
+
+.. code-block:: text
+
+     |  __init__(...)
+     |      __init__(self: example.Foo, arg0: ns::Bar) -> None
+                                              ^^^^^^^
+
+
+This limitation can be circumvented by ensuring that C++ classes are registered with pybind11
+before they are used as a parameter or return type of a function:
+
+.. code-block:: cpp
+
+    PYBIND11_MODULE(example, m) {
+
+        auto pyFoo = py::class_<ns::Foo>(m, "Foo");
+        auto pyBar = py::class_<ns::Bar>(m, "Bar");
+
+        pyFoo.def(py::init<const ns::Bar&>());
+        pyBar.def(py::init<const ns::Foo&>());
+    }
diff --git a/docs/advanced/pycpp/numpy.rst b/docs/advanced/pycpp/numpy.rst
index 458f99e..19ed10b 100644
--- a/docs/advanced/pycpp/numpy.rst
+++ b/docs/advanced/pycpp/numpy.rst
@@ -57,17 +57,17 @@
 
     struct buffer_info {
         void *ptr;
-        ssize_t itemsize;
+        py::ssize_t itemsize;
         std::string format;
-        ssize_t ndim;
-        std::vector<ssize_t> shape;
-        std::vector<ssize_t> strides;
+        py::ssize_t ndim;
+        std::vector<py::ssize_t> shape;
+        std::vector<py::ssize_t> strides;
     };
 
 To create a C++ function that can take a Python buffer object as an argument,
 simply use the type ``py::buffer`` as one of its arguments. Buffers can exist
 in a great variety of configurations, hence some safety checks are usually
-necessary in the function body. Below, you can see an basic example on how to
+necessary in the function body. Below, you can see a basic example on how to
 define a custom constructor for the Eigen double precision matrix
 (``Eigen::MatrixXd``) type, which supports initialization from compatible
 buffer objects (e.g. a NumPy matrix).
@@ -81,7 +81,7 @@
     constexpr bool rowMajor = Matrix::Flags & Eigen::RowMajorBit;
 
     py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
-        .def("__init__", [](Matrix &m, py::buffer b) {
+        .def(py::init([](py::buffer b) {
             typedef Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic> Strides;
 
             /* Request a buffer descriptor from Python */
@@ -101,8 +101,8 @@
             auto map = Eigen::Map<Matrix, 0, Strides>(
                 static_cast<Scalar *>(info.ptr), info.shape[0], info.shape[1], strides);
 
-            new (&m) Matrix(map);
-        });
+            return Matrix(map);
+        }));
 
 For reference, the ``def_buffer()`` call for this Eigen data type should look
 as follows:
@@ -274,9 +274,9 @@
 
         py::buffer_info buf3 = result.request();
 
-        double *ptr1 = (double *) buf1.ptr,
-               *ptr2 = (double *) buf2.ptr,
-               *ptr3 = (double *) buf3.ptr;
+        double *ptr1 = static_cast<double *>(buf1.ptr);
+        double *ptr2 = static_cast<double *>(buf2.ptr);
+        double *ptr3 = static_cast<double *>(buf3.ptr);
 
         for (size_t idx = 0; idx < buf1.shape[0]; idx++)
             ptr3[idx] = ptr1[idx] + ptr2[idx];
@@ -309,17 +309,17 @@
     m.def("sum_3d", [](py::array_t<double> x) {
         auto r = x.unchecked<3>(); // x must have ndim = 3; can be non-writeable
         double sum = 0;
-        for (ssize_t i = 0; i < r.shape(0); i++)
-            for (ssize_t j = 0; j < r.shape(1); j++)
-                for (ssize_t k = 0; k < r.shape(2); k++)
+        for (py::ssize_t i = 0; i < r.shape(0); i++)
+            for (py::ssize_t j = 0; j < r.shape(1); j++)
+                for (py::ssize_t k = 0; k < r.shape(2); k++)
                     sum += r(i, j, k);
         return sum;
     });
     m.def("increment_3d", [](py::array_t<double> x) {
         auto r = x.mutable_unchecked<3>(); // Will throw if ndim != 3 or flags.writeable is false
-        for (ssize_t i = 0; i < r.shape(0); i++)
-            for (ssize_t j = 0; j < r.shape(1); j++)
-                for (ssize_t k = 0; k < r.shape(2); k++)
+        for (py::ssize_t i = 0; i < r.shape(0); i++)
+            for (py::ssize_t j = 0; j < r.shape(1); j++)
+                for (py::ssize_t k = 0; k < r.shape(2); k++)
                     r(i, j, k) += 1.0;
     }, py::arg().noconvert());
 
@@ -371,6 +371,8 @@
 Python 3 provides a convenient ``...`` ellipsis notation that is often used to
 slice multidimensional arrays. For instance, the following snippet extracts the
 middle dimensions of a tensor with the first and last index set to zero.
+In Python 2, the syntactic sugar ``...`` is not available, but the singleton
+``Ellipsis`` (of type ``ellipsis``) can still be used directly.
 
 .. code-block:: python
 
@@ -384,3 +386,51 @@
 
    py::array a = /* A NumPy array */;
    py::array b = a[py::make_tuple(0, py::ellipsis(), 0)];
+
+.. versionchanged:: 2.6
+   ``py::ellipsis()`` is now also avaliable in Python 2.
+
+Memory view
+===========
+
+For a case when we simply want to provide a direct accessor to C/C++ buffer
+without a concrete class object, we can return a ``memoryview`` object. Suppose
+we wish to expose a ``memoryview`` for 2x4 uint8_t array, we can do the
+following:
+
+.. code-block:: cpp
+
+    const uint8_t buffer[] = {
+        0, 1, 2, 3,
+        4, 5, 6, 7
+    };
+    m.def("get_memoryview2d", []() {
+        return py::memoryview::from_buffer(
+            buffer,                                    // buffer pointer
+            { 2, 4 },                                  // shape (rows, cols)
+            { sizeof(uint8_t) * 4, sizeof(uint8_t) }   // strides in bytes
+        );
+    })
+
+This approach is meant for providing a ``memoryview`` for a C/C++ buffer not
+managed by Python. The user is responsible for managing the lifetime of the
+buffer. Using a ``memoryview`` created in this way after deleting the buffer in
+C++ side results in undefined behavior.
+
+We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer:
+
+.. code-block:: cpp
+
+    m.def("get_memoryview1d", []() {
+        return py::memoryview::from_memory(
+            buffer,               // buffer pointer
+            sizeof(uint8_t) * 8   // buffer size
+        );
+    })
+
+.. note::
+
+    ``memoryview::from_memory`` is not available in Python 2.
+
+.. versionchanged:: 2.6
+    ``memoryview::from_memory`` added.
diff --git a/docs/advanced/pycpp/object.rst b/docs/advanced/pycpp/object.rst
index 117131e..6c7525c 100644
--- a/docs/advanced/pycpp/object.rst
+++ b/docs/advanced/pycpp/object.rst
@@ -1,6 +1,8 @@
 Python types
 ############
 
+.. _wrappers:
+
 Available wrappers
 ==================
 
@@ -13,6 +15,13 @@
 :class:`iterable`, :class:`iterator`, :class:`function`, :class:`buffer`,
 :class:`array`, and :class:`array_t`.
 
+.. warning::
+
+    Be sure to review the :ref:`pytypes_gotchas` before using this heavily in
+    your C++ API.
+
+.. _casting_back_and_forth:
+
 Casting back and forth
 ======================
 
@@ -47,20 +56,21 @@
 .. code-block:: cpp
 
     // Equivalent to "from decimal import Decimal"
-    py::object Decimal = py::module::import("decimal").attr("Decimal");
+    py::object Decimal = py::module_::import("decimal").attr("Decimal");
 
 .. code-block:: cpp
 
     // Try to import scipy
-    py::object scipy = py::module::import("scipy");
+    py::object scipy = py::module_::import("scipy");
     return scipy.attr("__version__");
 
+
 .. _calling_python_functions:
 
 Calling Python functions
 ========================
 
-It is also possible to call Python classes, functions and methods 
+It is also possible to call Python classes, functions and methods
 via ``operator()``.
 
 .. code-block:: cpp
@@ -71,11 +81,11 @@
 .. code-block:: cpp
 
     // Use Python to make our directories
-    py::object os = py::module::import("os");
+    py::object os = py::module_::import("os");
     py::object makedirs = os.attr("makedirs");
     makedirs("/tmp/path/to/somewhere");
 
-One can convert the result obtained from Python to a pure C++ version 
+One can convert the result obtained from Python to a pure C++ version
 if a ``py::class_`` or type conversion is defined.
 
 .. code-block:: cpp
@@ -99,8 +109,8 @@
     py::print(py::str(exp_pi));
 
 In the example above ``pi.attr("exp")`` is a *bound method*: it will always call
-the method for that same instance of the class. Alternately one can create an 
-*unbound method* via the Python class (instead of instance) and pass the ``self`` 
+the method for that same instance of the class. Alternately one can create an
+*unbound method* via the Python class (instead of instance) and pass the ``self``
 object explicitly, followed by other arguments.
 
 .. code-block:: cpp
@@ -168,3 +178,74 @@
     Python functions from C++, including keywords arguments and unpacking.
 
 .. _PEP448: https://www.python.org/dev/peps/pep-0448/
+
+.. _implicit_casting:
+
+Implicit casting
+================
+
+When using the C++ interface for Python types, or calling Python functions,
+objects of type :class:`object` are returned. It is possible to invoke implicit
+conversions to subclasses like :class:`dict`. The same holds for the proxy objects
+returned by ``operator[]`` or ``obj.attr()``.
+Casting to subtypes improves code readability and allows values to be passed to
+C++ functions that require a specific subtype rather than a generic :class:`object`.
+
+.. code-block:: cpp
+
+    #include <pybind11/numpy.h>
+    using namespace pybind11::literals;
+
+    py::module_ os = py::module_::import("os");
+    py::module_ path = py::module_::import("os.path");  // like 'import os.path as path'
+    py::module_ np = py::module_::import("numpy");  // like 'import numpy as np'
+
+    py::str curdir_abs = path.attr("abspath")(path.attr("curdir"));
+    py::print(py::str("Current directory: ") + curdir_abs);
+    py::dict environ = os.attr("environ");
+    py::print(environ["HOME"]);
+    py::array_t<float> arr = np.attr("ones")(3, "dtype"_a="float32");
+    py::print(py::repr(arr + py::int_(1)));
+
+These implicit conversions are available for subclasses of :class:`object`; there
+is no need to call ``obj.cast()`` explicitly as for custom classes, see
+:ref:`casting_back_and_forth`.
+
+.. note::
+    If a trivial conversion via move constructor is not possible, both implicit and
+    explicit casting (calling ``obj.cast()``) will attempt a "rich" conversion.
+    For instance, ``py::list env = os.attr("environ");`` will succeed and is
+    equivalent to the Python code ``env = list(os.environ)`` that produces a
+    list of the dict keys.
+
+..  TODO: Adapt text once PR #2349 has landed
+
+Handling exceptions
+===================
+
+Python exceptions from wrapper classes will be thrown as a ``py::error_already_set``.
+See :ref:`Handling exceptions from Python in C++
+<handling_python_exceptions_cpp>` for more information on handling exceptions
+raised when calling C++ wrapper classes.
+
+.. _pytypes_gotchas:
+
+Gotchas
+=======
+
+Default-Constructed Wrappers
+----------------------------
+
+When a wrapper type is default-constructed, it is **not** a valid Python object (i.e. it is not ``py::none()``). It is simply the same as
+``PyObject*`` null pointer. To check for this, use
+``static_cast<bool>(my_wrapper)``.
+
+Assigning py::none() to wrappers
+--------------------------------
+
+You may be tempted to use types like ``py::str`` and ``py::dict`` in C++
+signatures (either pure C++, or in bound signatures), and assign them default
+values of ``py::none()``. However, in a best case scenario, it will fail fast
+because ``None`` is not convertible to that type (e.g. ``py::dict``), or in a
+worse case scenario, it will silently work but corrupt the types you want to
+work with (e.g. ``py::str(py::none())`` will yield ``"None"`` in Python).
diff --git a/docs/advanced/pycpp/utilities.rst b/docs/advanced/pycpp/utilities.rst
index 369e7c9..c15051f 100644
--- a/docs/advanced/pycpp/utilities.rst
+++ b/docs/advanced/pycpp/utilities.rst
@@ -42,7 +42,7 @@
     m.def("noisy_func", []() {
         py::scoped_ostream_redirect stream(
             std::cout,                               // std::ostream&
-            py::module::import("sys").attr("stdout") // Python output
+            py::module_::import("sys").attr("stdout") // Python output
         );
         call_noisy_func();
     });
@@ -104,7 +104,7 @@
     ...
 
     // Evaluate in scope of main module
-    py::object scope = py::module::import("__main__").attr("__dict__");
+    py::object scope = py::module_::import("__main__").attr("__dict__");
 
     // Evaluate an isolated expression
     int result = py::eval("my_variable + 10", scope).cast<int>();
diff --git a/docs/basics.rst b/docs/basics.rst
index 7bf4d42..b9d386c 100644
--- a/docs/basics.rst
+++ b/docs/basics.rst
@@ -11,11 +11,11 @@
 Compiling the test cases
 ========================
 
-Linux/MacOS
+Linux/macOS
 -----------
 
 On Linux  you'll need to install the **python-dev** or **python3-dev** packages as
-well as **cmake**. On Mac OS, the included python version works out of the box,
+well as **cmake**. On macOS, the included python version works out of the box,
 but **cmake** must still be installed.
 
 After installing the prerequisites, run
@@ -35,6 +35,14 @@
 On Windows, only **Visual Studio 2015** and newer are supported since pybind11 relies
 on various C++11 language features that break older versions of Visual Studio.
 
+.. Note::
+
+    To use the C++17 in Visual Studio 2017 (MSVC 14.1), pybind11 requires the flag
+    ``/permissive-`` to be passed to the compiler `to enforce standard conformance`_. When
+    building with Visual Studio 2019, this is not strictly necessary, but still advised.
+
+..  _`to enforce standard conformance`: https://docs.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=vs-2017
+
 To compile and run the tests:
 
 .. code-block:: batch
@@ -110,8 +118,8 @@
 The :func:`PYBIND11_MODULE` macro creates a function that will be called when an
 ``import`` statement is issued from within Python. The module name (``example``)
 is given as the first macro argument (it should not be in quotes). The second
-argument (``m``) defines a variable of type :class:`py::module <module>` which
-is the main interface for creating bindings. The method :func:`module::def`
+argument (``m``) defines a variable of type :class:`py::module_ <module>` which
+is the main interface for creating bindings. The method :func:`module_::def`
 generates binding code that exposes the ``add()`` function to Python.
 
 .. note::
@@ -130,7 +138,7 @@
 
     $ c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` example.cpp -o example`python3-config --extension-suffix`
 
-For more details on the required compiler flags on Linux and MacOS, see
+For more details on the required compiler flags on Linux and macOS, see
 :ref:`building_manually`. For complete cross-platform compilation instructions,
 refer to the :ref:`compiling` page.
 
@@ -173,7 +181,7 @@
           py::arg("i"), py::arg("j"));
 
 :class:`arg` is one of several special tag classes which can be used to pass
-metadata into :func:`module::def`. With this modified binding code, we can now
+metadata into :func:`module_::def`. With this modified binding code, we can now
 call the function using keyword arguments, which is a more readable alternative
 particularly for functions taking many parameters:
 
diff --git a/docs/benchmark.py b/docs/benchmark.py
index 6dc0604..33d78fb 100644
--- a/docs/benchmark.py
+++ b/docs/benchmark.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import random
 import os
 import time
@@ -13,7 +14,7 @@
 
     for cl in range(nclasses):
         decl += "class cl%03i;\n" % cl
-    decl += '\n'
+    decl += "\n"
 
     for cl in range(nclasses):
         decl += "class cl%03i {\n" % cl
@@ -21,18 +22,17 @@
         bindings += '    py::class_<cl%03i>(m, "cl%03i")\n' % (cl, cl)
         for fn in range(nfns):
             ret = random.randint(0, nclasses - 1)
-            params  = [random.randint(0, nclasses - 1) for i in range(nargs)]
+            params = [random.randint(0, nclasses - 1) for i in range(nargs)]
             decl += "    cl%03i *fn_%03i(" % (ret, fn)
             decl += ", ".join("cl%03i *" % p for p in params)
             decl += ");\n"
-            bindings += '        .def("fn_%03i", &cl%03i::fn_%03i)\n' % \
-                (fn, cl, fn)
+            bindings += '        .def("fn_%03i", &cl%03i::fn_%03i)\n' % (fn, cl, fn)
         decl += "};\n\n"
-        bindings += '        ;\n'
+        bindings += "        ;\n"
 
     result = "#include <pybind11/pybind11.h>\n\n"
     result += "namespace py = pybind11;\n\n"
-    result += decl + '\n'
+    result += decl + "\n"
     result += "PYBIND11_MODULE(example, m) {\n"
     result += bindings
     result += "}"
@@ -45,7 +45,7 @@
 
     for cl in range(nclasses):
         decl += "class cl%03i;\n" % cl
-    decl += '\n'
+    decl += "\n"
 
     for cl in range(nclasses):
         decl += "class cl%03i {\n" % cl
@@ -53,18 +53,20 @@
         bindings += '    py::class_<cl%03i>("cl%03i")\n' % (cl, cl)
         for fn in range(nfns):
             ret = random.randint(0, nclasses - 1)
-            params  = [random.randint(0, nclasses - 1) for i in range(nargs)]
+            params = [random.randint(0, nclasses - 1) for i in range(nargs)]
             decl += "    cl%03i *fn_%03i(" % (ret, fn)
             decl += ", ".join("cl%03i *" % p for p in params)
             decl += ");\n"
-            bindings += '        .def("fn_%03i", &cl%03i::fn_%03i, py::return_value_policy<py::manage_new_object>())\n' % \
-                (fn, cl, fn)
+            bindings += (
+                '        .def("fn_%03i", &cl%03i::fn_%03i, py::return_value_policy<py::manage_new_object>())\n'
+                % (fn, cl, fn)
+            )
         decl += "};\n\n"
-        bindings += '        ;\n'
+        bindings += "        ;\n"
 
     result = "#include <boost/python.hpp>\n\n"
     result += "namespace py = boost::python;\n\n"
-    result += decl + '\n'
+    result += decl + "\n"
     result += "BOOST_PYTHON_MODULE(example) {\n"
     result += bindings
     result += "}"
@@ -72,17 +74,19 @@
 
 
 for codegen in [generate_dummy_code_pybind11, generate_dummy_code_boost]:
-    print ("{")
+    print("{")
     for i in range(0, 10):
         nclasses = 2 ** i
         with open("test.cpp", "w") as f:
             f.write(codegen(nclasses))
         n1 = dt.datetime.now()
-        os.system("g++ -Os -shared -rdynamic -undefined dynamic_lookup "
+        os.system(
+            "g++ -Os -shared -rdynamic -undefined dynamic_lookup "
             "-fvisibility=hidden -std=c++14 test.cpp -I include "
-            "-I /System/Library/Frameworks/Python.framework/Headers -o test.so")
+            "-I /System/Library/Frameworks/Python.framework/Headers -o test.so"
+        )
         n2 = dt.datetime.now()
         elapsed = (n2 - n1).total_seconds()
-        size = os.stat('test.so').st_size
+        size = os.stat("test.so").st_size
         print("   {%i, %f, %i}," % (nclasses * nfns, elapsed, size))
-    print ("}")
+    print("}")
diff --git a/docs/benchmark.rst b/docs/benchmark.rst
index 59d533d..02c2ccd 100644
--- a/docs/benchmark.rst
+++ b/docs/benchmark.rst
@@ -93,5 +93,3 @@
 .. only:: latex
 
     .. image:: pybind11_vs_boost_python2.png
-
-
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 2def2b0..561baa5 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -6,6 +6,278 @@
 Starting with version 1.8.0, pybind11 releases use a `semantic versioning
 <http://semver.org>`_ policy.
 
+v2.6.1 (Nov 11, 2020)
+---------------------
+
+* ``py::exec``, ``py::eval``, and ``py::eval_file`` now add the builtins module
+  as ``"__builtins__"`` to their ``globals`` argument, better matching ``exec``
+  and ``eval`` in pure Python.
+  `#2616 <https://github.com/pybind/pybind11/pull/2616>`_
+
+* ``setup_helpers`` will no longer set a minimum macOS version lower than the
+  current version.
+  `#2622 <https://github.com/pybind/pybind11/pull/2622>`_
+
+* Allow deleting static properties.
+  `#2629 <https://github.com/pybind/pybind11/pull/2629>`_
+
+* Seal a leak in ``def_buffer``, cleaning up the ``capture`` object after the
+  ``class_`` object goes out of scope.
+  `#2634 <https://github.com/pybind/pybind11/pull/2634>`_
+
+* ``pybind11_INCLUDE_DIRS`` was incorrect, potentially causing a regression if
+  it was expected to include ``PYTHON_INCLUDE_DIRS`` (please use targets
+  instead).
+  `#2636 <https://github.com/pybind/pybind11/pull/2636>`_
+
+* Added parameter names to the ``py::enum_`` constructor and methods, avoiding
+  ``arg0`` in the generated docstrings.
+  `#2637 <https://github.com/pybind/pybind11/pull/2637>`_
+
+* Added ``needs_recompile`` optional function to the ``ParallelCompiler``
+  helper, to allow a recompile to be skipped based on a user-defined function.
+  `#2643 <https://github.com/pybind/pybind11/pull/2643>`_
+
+
+v2.6.0 (Oct 21, 2020)
+---------------------
+
+See :ref:`upgrade-guide-2.6` for help upgrading to the new version.
+
+New features:
+
+* Keyword-only arguments supported in Python 2 or 3 with ``py::kw_only()``.
+  `#2100 <https://github.com/pybind/pybind11/pull/2100>`_
+
+* Positional-only arguments supported in Python 2 or 3 with ``py::pos_only()``.
+  `#2459 <https://github.com/pybind/pybind11/pull/2459>`_
+
+* ``py::is_final()`` class modifier to block subclassing (CPython only).
+  `#2151 <https://github.com/pybind/pybind11/pull/2151>`_
+
+* Added ``py::prepend()``, allowing a function to be placed at the beginning of
+  the overload chain.
+  `#1131 <https://github.com/pybind/pybind11/pull/1131>`_
+
+* Access to the type object now provided with ``py::type::of<T>()`` and
+  ``py::type::of(h)``.
+  `#2364 <https://github.com/pybind/pybind11/pull/2364>`_
+
+* Perfect forwarding support for methods.
+  `#2048 <https://github.com/pybind/pybind11/pull/2048>`_
+
+* Added ``py::error_already_set::discard_as_unraisable()``.
+  `#2372 <https://github.com/pybind/pybind11/pull/2372>`_
+
+* ``py::hash`` is now public.
+  `#2217 <https://github.com/pybind/pybind11/pull/2217>`_
+
+* ``py::class_<union_type>`` is now supported. Note that writing to one data
+  member of the union and reading another (type punning) is UB in C++. Thus
+  pybind11-bound enums should never be used for such conversions.
+  `#2320 <https://github.com/pybind/pybind11/pull/2320>`_.
+
+* Classes now check local scope when registering members, allowing a subclass
+  to have a member with the same name as a parent (such as an enum).
+  `#2335 <https://github.com/pybind/pybind11/pull/2335>`_
+
+Code correctness features:
+
+* Error now thrown when ``__init__`` is forgotten on subclasses.
+  `#2152 <https://github.com/pybind/pybind11/pull/2152>`_
+
+* Throw error if conversion to a pybind11 type if the Python object isn't a
+  valid instance of that type, such as ``py::bytes(o)`` when ``py::object o``
+  isn't a bytes instance.
+  `#2349 <https://github.com/pybind/pybind11/pull/2349>`_
+
+* Throw if conversion to ``str`` fails.
+  `#2477 <https://github.com/pybind/pybind11/pull/2477>`_
+
+
+API changes:
+
+* ``py::module`` was renamed ``py::module_`` to avoid issues with C++20 when
+  used unqualified, but an alias ``py::module`` is provided for backward
+  compatibility.
+  `#2489 <https://github.com/pybind/pybind11/pull/2489>`_
+
+* Public constructors for ``py::module_`` have been deprecated; please use
+  ``pybind11::module_::create_extension_module`` if you were using the public
+  constructor (fairly rare after ``PYBIND11_MODULE`` was introduced).
+  `#2552 <https://github.com/pybind/pybind11/pull/2552>`_
+
+* ``PYBIND11_OVERLOAD*`` macros and ``get_overload`` function replaced by
+  correctly-named ``PYBIND11_OVERRIDE*`` and ``get_override``, fixing
+  inconsistencies in the presence of a closing ``;`` in these macros.
+  ``get_type_overload`` is deprecated.
+  `#2325 <https://github.com/pybind/pybind11/pull/2325>`_
+
+Packaging / building improvements:
+
+* The Python package was reworked to be more powerful and useful.
+  `#2433 <https://github.com/pybind/pybind11/pull/2433>`_
+
+  * :ref:`build-setuptools` is easier thanks to a new
+    ``pybind11.setup_helpers`` module, which provides utilities to use
+    setuptools with pybind11. It can be used via PEP 518, ``setup_requires``,
+    or by directly importing or copying ``setup_helpers.py`` into your project.
+
+  * CMake configuration files are now included in the Python package. Use
+    ``pybind11.get_cmake_dir()`` or ``python -m pybind11 --cmakedir`` to get
+    the directory with the CMake configuration files, or include the
+    site-packages location in your ``CMAKE_MODULE_PATH``. Or you can use the
+    new ``pybind11[global]`` extra when you install ``pybind11``, which
+    installs the CMake files and headers into your base environment in the
+    standard location.
+
+  * ``pybind11-config`` is another way to write ``python -m pybind11`` if you
+    have your PATH set up.
+
+  * Added external typing support to the helper module, code from
+    ``import pybind11`` can now be type checked.
+    `#2588 <https://github.com/pybind/pybind11/pull/2588>`_
+
+* Minimum CMake required increased to 3.4.
+  `#2338 <https://github.com/pybind/pybind11/pull/2338>`_ and
+  `#2370 <https://github.com/pybind/pybind11/pull/2370>`_
+
+  * Full integration with CMake’s C++ standard system and compile features
+    replaces ``PYBIND11_CPP_STANDARD``.
+
+  * Generated config file is now portable to different Python/compiler/CMake
+    versions.
+
+  * Virtual environments prioritized if ``PYTHON_EXECUTABLE`` is not set
+    (``venv``, ``virtualenv``, and ``conda``) (similar to the new FindPython
+    mode).
+
+  * Other CMake features now natively supported, like
+    ``CMAKE_INTERPROCEDURAL_OPTIMIZATION``, ``set(CMAKE_CXX_VISIBILITY_PRESET
+    hidden)``.
+
+  * ``CUDA`` as a language is now supported.
+
+  * Helper functions ``pybind11_strip``, ``pybind11_extension``,
+    ``pybind11_find_import`` added, see :doc:`cmake/index`.
+
+  * Optional :ref:`find-python-mode` and :ref:`nopython-mode` with CMake.
+    `#2370 <https://github.com/pybind/pybind11/pull/2370>`_
+
+* Uninstall target added.
+  `#2265 <https://github.com/pybind/pybind11/pull/2265>`_ and
+  `#2346 <https://github.com/pybind/pybind11/pull/2346>`_
+
+* ``pybind11_add_module()`` now accepts an optional ``OPT_SIZE`` flag that
+  switches the binding target to size-based optimization if the global build
+  type can not always be fixed to ``MinSizeRel`` (except in debug mode, where
+  optimizations remain disabled).  ``MinSizeRel`` or this flag reduces binary
+  size quite substantially (~25% on some platforms).
+  `#2463 <https://github.com/pybind/pybind11/pull/2463>`_
+
+Smaller or developer focused features and fixes:
+
+* Moved ``mkdoc.py`` to a new repo, `pybind11-mkdoc`_. There are no longer
+  submodules in the main repo.
+
+* ``py::memoryview`` segfault fix and update, with new
+  ``py::memoryview::from_memory`` in Python 3, and documentation.
+  `#2223 <https://github.com/pybind/pybind11/pull/2223>`_
+
+* Fix for ``buffer_info`` on Python 2.
+  `#2503 <https://github.com/pybind/pybind11/pull/2503>`_
+
+* If ``__eq__`` defined but not ``__hash__``, ``__hash__`` is now set to
+  ``None``.
+  `#2291 <https://github.com/pybind/pybind11/pull/2291>`_
+
+* ``py::ellipsis`` now also works on Python 2.
+  `#2360 <https://github.com/pybind/pybind11/pull/2360>`_
+
+* Pointer to ``std::tuple`` & ``std::pair`` supported in cast.
+  `#2334 <https://github.com/pybind/pybind11/pull/2334>`_
+
+* Small fixes in NumPy support. ``py::array`` now uses ``py::ssize_t`` as first
+  argument type.
+  `#2293 <https://github.com/pybind/pybind11/pull/2293>`_
+
+* Added missing signature for ``py::array``.
+  `#2363 <https://github.com/pybind/pybind11/pull/2363>`_
+
+* ``unchecked_mutable_reference`` has access to operator ``()`` and ``[]`` when
+  const.
+  `#2514 <https://github.com/pybind/pybind11/pull/2514>`_
+
+* ``py::vectorize`` is now supported on functions that return void.
+  `#1969 <https://github.com/pybind/pybind11/pull/1969>`_
+
+* ``py::capsule`` supports ``get_pointer`` and ``set_pointer``.
+  `#1131 <https://github.com/pybind/pybind11/pull/1131>`_
+
+* Fix crash when different instances share the same pointer of the same type.
+  `#2252 <https://github.com/pybind/pybind11/pull/2252>`_
+
+* Fix for ``py::len`` not clearing Python's error state when it fails and throws.
+  `#2575 <https://github.com/pybind/pybind11/pull/2575>`_
+
+* Bugfixes related to more extensive testing, new GitHub Actions CI.
+  `#2321 <https://github.com/pybind/pybind11/pull/2321>`_
+
+* Bug in timezone issue in Eastern hemisphere midnight fixed.
+  `#2438 <https://github.com/pybind/pybind11/pull/2438>`_
+
+* ``std::chrono::time_point`` now works when the resolution is not the same as
+  the system.
+  `#2481 <https://github.com/pybind/pybind11/pull/2481>`_
+
+* Bug fixed where ``py::array_t`` could accept arrays that did not match the
+  requested ordering.
+  `#2484 <https://github.com/pybind/pybind11/pull/2484>`_
+
+* Avoid a segfault on some compilers when types are removed in Python.
+  `#2564 <https://github.com/pybind/pybind11/pull/2564>`_
+
+* ``py::arg::none()`` is now also respected when passing keyword arguments.
+  `#2611 <https://github.com/pybind/pybind11/pull/2611>`_
+
+* PyPy fixes, PyPy 7.3.x now supported, including PyPy3. (Known issue with
+  PyPy2 and Windows `#2596 <https://github.com/pybind/pybind11/issues/2596>`_).
+  `#2146 <https://github.com/pybind/pybind11/pull/2146>`_
+
+* CPython 3.9.0 workaround for undefined behavior (macOS segfault).
+  `#2576 <https://github.com/pybind/pybind11/pull/2576>`_
+
+* CPython 3.9 warning fixes.
+  `#2253 <https://github.com/pybind/pybind11/pull/2253>`_
+
+* Improved C++20 support, now tested in CI.
+  `#2489 <https://github.com/pybind/pybind11/pull/2489>`_
+  `#2599 <https://github.com/pybind/pybind11/pull/2599>`_
+
+* Improved but still incomplete debug Python interpreter support.
+  `#2025 <https://github.com/pybind/pybind11/pull/2025>`_
+
+* NVCC (CUDA 11) now supported and tested in CI.
+  `#2461 <https://github.com/pybind/pybind11/pull/2461>`_
+
+* NVIDIA PGI compilers now supported and tested in CI.
+  `#2475 <https://github.com/pybind/pybind11/pull/2475>`_
+
+* At least Intel 18 now explicitly required when compiling with Intel.
+  `#2577 <https://github.com/pybind/pybind11/pull/2577>`_
+
+* Extensive style checking in CI, with `pre-commit`_ support. Code
+  modernization, checked by clang-tidy.
+
+* Expanded docs, including new main page, new installing section, and CMake
+  helpers page, along with over a dozen new sections on existing pages.
+
+* In GitHub, new docs for contributing and new issue templates.
+
+.. _pre-commit: https://pre-commit.com
+
+.. _pybind11-mkdoc: https://github.com/pybind/pybind11-mkdoc
+
 v2.5.0 (Mar 31, 2020)
 -----------------------------------------------------
 
@@ -361,7 +633,7 @@
 v2.2.1 (September 14, 2017)
 -----------------------------------------------------
 
-* Added ``py::module::reload()`` member function for reloading a module.
+* Added ``py::module_::reload()`` member function for reloading a module.
   `#1040 <https://github.com/pybind/pybind11/pull/1040>`_.
 
 * Fixed a reference leak in the number converter.
@@ -536,7 +808,7 @@
   in reference cycles.
   `#856 <https://github.com/pybind/pybind11/pull/856>`_.
 
-* Numpy and buffer protocol related improvements:
+* NumPy and buffer protocol related improvements:
 
   1. Support for negative strides in Python buffer objects/numpy arrays. This
      required changing integers from unsigned to signed for the related C++ APIs.
@@ -1267,7 +1539,7 @@
 * Improved support for ``std::shared_ptr<>`` conversions
 * Initial support for ``std::set<>`` conversions
 * Fixed type resolution issue for types defined in a separate plugin module
-* Cmake build system improvements
+* CMake build system improvements
 * Factored out generic functionality to non-templated code (smaller code size)
 * Added a code size / compile time benchmark vs Boost.Python
 * Added an appveyor CI script
diff --git a/docs/classes.rst b/docs/classes.rst
index a63f6a1..f3610ef 100644
--- a/docs/classes.rst
+++ b/docs/classes.rst
@@ -74,7 +74,7 @@
     >>> print(p)
     <example.Pet object at 0x10cd98060>
 
-To address this, we could bind an utility function that returns a human-readable
+To address this, we could bind a utility function that returns a human-readable
 summary to the special method slot named ``__repr__``. Unfortunately, there is no
 suitable functionality in the ``Pet`` data structure, and it would be nice if
 we did not have to change it. This can easily be accomplished by binding a
@@ -373,8 +373,8 @@
 
     py::class_<Pet>(m, "Pet")
        .def(py::init<const std::string &, int>())
-       .def("set", (void (Pet::*)(int)) &Pet::set, "Set the pet's age")
-       .def("set", (void (Pet::*)(const std::string &)) &Pet::set, "Set the pet's name");
+       .def("set", static_cast<void (Pet::*)(int)>(&Pet::set), "Set the pet's age")
+       .def("set", static_cast<void (Pet::*)(const std::string &)>(&Pet::set), "Set the pet's name");
 
 The overload signatures are also visible in the method's docstring:
 
diff --git a/docs/cmake/index.rst b/docs/cmake/index.rst
new file mode 100644
index 0000000..eaf66d7
--- /dev/null
+++ b/docs/cmake/index.rst
@@ -0,0 +1,8 @@
+CMake helpers
+-------------
+
+Pybind11 can be used with ``add_subdirectory(extern/pybind11)``, or from an
+install with ``find_package(pybind11 CONFIG)``. The interface provided in
+either case is functionally identical.
+
+.. cmake-module:: ../../tools/pybind11Config.cmake.in
diff --git a/docs/compiling.rst b/docs/compiling.rst
index c50c7d8..f26e6cf 100644
--- a/docs/compiling.rst
+++ b/docs/compiling.rst
@@ -3,6 +3,8 @@
 Build systems
 #############
 
+.. _build-setuptools:
+
 Building with setuptools
 ========================
 
@@ -13,6 +15,203 @@
 
 .. [python_example] https://github.com/pybind/python_example
 
+A helper file is provided with pybind11 that can simplify usage with setuptools.
+
+To use pybind11 inside your ``setup.py``, you have to have some system to
+ensure that ``pybind11`` is installed when you build your package. There are
+four possible ways to do this, and pybind11 supports all four: You can ask all
+users to install pybind11 beforehand (bad), you can use
+:ref:`setup_helpers-pep518` (good, but very new and requires Pip 10),
+:ref:`setup_helpers-setup_requires` (discouraged by Python packagers now that
+PEP 518 is available, but it still works everywhere), or you can
+:ref:`setup_helpers-copy-manually` (always works but you have to manually sync
+your copy to get updates).
+
+An example of a ``setup.py`` using pybind11's helpers:
+
+.. code-block:: python
+
+    from glob import glob
+    from setuptools import setup
+    from pybind11.setup_helpers import Pybind11Extension
+
+    ext_modules = [
+        Pybind11Extension(
+            "python_example",
+            sorted(glob("src/*.cpp")),  # Sort source files for reproducibility
+        ),
+    ]
+
+    setup(
+        ...,
+        ext_modules=ext_modules
+    )
+
+If you want to do an automatic search for the highest supported C++ standard,
+that is supported via a ``build_ext`` command override; it will only affect
+``Pybind11Extensions``:
+
+.. code-block:: python
+
+    from glob import glob
+    from setuptools import setup
+    from pybind11.setup_helpers import Pybind11Extension, build_ext
+
+    ext_modules = [
+        Pybind11Extension(
+            "python_example",
+            sorted(glob("src/*.cpp")),
+        ),
+    ]
+
+    setup(
+        ...,
+        cmdclass={"build_ext": build_ext},
+        ext_modules=ext_modules
+    )
+
+Since pybind11 does not require NumPy when building, a light-weight replacement
+for NumPy's parallel compilation distutils tool is included. Use it like this:
+
+.. code-block:: python
+
+    from pybind11.setup_helpers import ParallelCompile
+
+    # Optional multithreaded build
+    ParallelCompile("NPY_NUM_BUILD_JOBS").install()
+
+    setup(...)
+
+The argument is the name of an environment variable to control the number of
+threads, such as ``NPY_NUM_BUILD_JOBS`` (as used by NumPy), though you can set
+something different if you want. You can also pass ``default=N`` to set the
+default number of threads (0 will take the number of threads available) and
+``max=N``, the maximum number of threads; if you have a large extension you may
+want set this to a memory dependent number.
+
+If you are developing rapidly and have a lot of C++ files, you may want to
+avoid rebuilding files that have not changed. For simple cases were you are
+using ``pip install -e .`` and do not have local headers, you can skip the
+rebuild if a object file is newer than it's source (headers are not checked!)
+with the following:
+
+.. code-block:: python
+
+    from pybind11.setup_helpers import ParallelCompile, naive_recompile
+
+    SmartCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile).install()
+
+
+If you have a more complex build, you can implement a smarter function and pass
+it to ``needs_recompile``, or you can use [Ccache]_ instead. ``CXX="cache g++"
+pip install -e .`` would be the way to use it with GCC, for example. Unlike the
+simple solution, this even works even when not compiling in editable mode, but
+it does require Ccache to be installed.
+
+Keep in mind that Pip will not even attempt to rebuild if it thinks it has
+already built a copy of your code, which it deduces from the version number.
+One way to avoid this is to use [setuptools_scm]_, which will generate a
+version number that includes the number of commits since your last tag and a
+hash for a dirty directory. Another way to force a rebuild is purge your cache
+or use Pip's ``--no-cache-dir`` option.
+
+.. [Ccache] https://ccache.dev
+
+.. [setuptools_scm] https://github.com/pypa/setuptools_scm
+
+.. _setup_helpers-pep518:
+
+PEP 518 requirements (Pip 10+ required)
+---------------------------------------
+
+If you use `PEP 518's <https://www.python.org/dev/peps/pep-0518/>`_
+``pyproject.toml`` file, you can ensure that ``pybind11`` is available during
+the compilation of your project.  When this file exists, Pip will make a new
+virtual environment, download just the packages listed here in ``requires=``,
+and build a wheel (binary Python package). It will then throw away the
+environment, and install your wheel.
+
+Your ``pyproject.toml`` file will likely look something like this:
+
+.. code-block:: toml
+
+    [build-system]
+    requires = ["setuptools", "wheel", "pybind11==2.6.0"]
+    build-backend = "setuptools.build_meta"
+
+.. note::
+
+    The main drawback to this method is that a `PEP 517`_ compliant build tool,
+    such as Pip 10+, is required for this approach to work; older versions of
+    Pip completely ignore this file. If you distribute binaries (called wheels
+    in Python) using something like `cibuildwheel`_, remember that ``setup.py``
+    and ``pyproject.toml`` are not even contained in the wheel, so this high
+    Pip requirement is only for source builds, and will not affect users of
+    your binary wheels.
+
+.. _PEP 517: https://www.python.org/dev/peps/pep-0517/
+.. _cibuildwheel: https://cibuildwheel.readthedocs.io
+
+.. _setup_helpers-setup_requires:
+
+Classic ``setup_requires``
+--------------------------
+
+If you want to support old versions of Pip with the classic
+``setup_requires=["pybind11"]`` keyword argument to setup, which triggers a
+two-phase ``setup.py`` run, then you will need to use something like this to
+ensure the first pass works (which has not yet installed the ``setup_requires``
+packages, since it can't install something it does not know about):
+
+.. code-block:: python
+
+    try:
+        from pybind11.setup_helpers import Pybind11Extension
+    except ImportError:
+        from setuptools import Extension as Pybind11Extension
+
+
+It doesn't matter that the Extension class is not the enhanced subclass for the
+first pass run; and the second pass will have the ``setup_requires``
+requirements.
+
+This is obviously more of a hack than the PEP 518 method, but it supports
+ancient versions of Pip.
+
+.. _setup_helpers-copy-manually:
+
+Copy manually
+-------------
+
+You can also copy ``setup_helpers.py`` directly to your project; it was
+designed to be usable standalone, like the old example ``setup.py``. You can
+set ``include_pybind11=False`` to skip including the pybind11 package headers,
+so you can use it with git submodules and a specific git version. If you use
+this, you will need to import from a local file in ``setup.py`` and ensure the
+helper file is part of your MANIFEST.
+
+
+Closely related, if you include pybind11 as a subproject, you can run the
+``setup_helpers.py`` inplace. If loaded correctly, this should even pick up
+the correct include for pybind11, though you can turn it off as shown above if
+you want to input it manually.
+
+Suggested usage if you have pybind11 as a submodule in ``extern/pybind11``:
+
+.. code-block:: python
+
+    DIR = os.path.abspath(os.path.dirname(__file__))
+
+    sys.path.append(os.path.join(DIR, "extern", "pybind11"))
+    from pybind11.setup_helpers import Pybind11Extension  # noqa: E402
+
+    del sys.path[-1]
+
+
+.. versionchanged:: 2.6
+
+    Added ``setup_helpers`` file.
+
 Building with cppimport
 ========================
 
@@ -33,8 +232,8 @@
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 2.8.12)
-    project(example)
+    cmake_minimum_required(VERSION 3.4...3.18)
+    project(example LANGUAGES CXX)
 
     add_subdirectory(pybind11)
     pybind11_add_module(example example.cpp)
@@ -50,6 +249,11 @@
 
 .. [cmake_example] https://github.com/pybind/cmake_example
 
+.. versionchanged:: 2.6
+   CMake 3.4+ is required.
+
+Further information can be found at :doc:`cmake/index`.
+
 pybind11_add_module
 -------------------
 
@@ -59,7 +263,7 @@
 .. code-block:: cmake
 
     pybind11_add_module(<name> [MODULE | SHARED] [EXCLUDE_FROM_ALL]
-                        [NO_EXTRAS] [SYSTEM] [THIN_LTO] source1 [source2 ...])
+                        [NO_EXTRAS] [THIN_LTO] [OPT_SIZE] source1 [source2 ...])
 
 This function behaves very much like CMake's builtin ``add_library`` (in fact,
 it's a wrapper function around that command). It will add a library target
@@ -86,53 +290,68 @@
 given, they will always be disabled, even in ``Release`` mode. However, this
 will result in code bloat and is generally not recommended.
 
-By default, pybind11 and Python headers will be included with ``-I``. In order
-to include pybind11 as system library, e.g. to avoid warnings in downstream
-code with warn-levels outside of pybind11's scope, set the option ``SYSTEM``.
-
 As stated above, LTO is enabled by default. Some newer compilers also support
 different flavors of LTO such as `ThinLTO`_. Setting ``THIN_LTO`` will cause
 the function to prefer this flavor if available. The function falls back to
-regular LTO if ``-flto=thin`` is not available.
+regular LTO if ``-flto=thin`` is not available. If
+``CMAKE_INTERPROCEDURAL_OPTIMIZATION`` is set (either ``ON`` or ``OFF``), then
+that will be respected instead of the built-in flag search.
+
+.. note::
+
+   If you want to set the property form on targets or the
+   ``CMAKE_INTERPROCEDURAL_OPTIMIZATION_<CONFIG>`` versions of this, you should
+   still use ``set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)`` (otherwise a
+   no-op) to disable pybind11's ipo flags.
+
+The ``OPT_SIZE`` flag enables size-based optimization equivalent to the
+standard ``/Os`` or ``-Os`` compiler flags and the ``MinSizeRel`` build type,
+which avoid optimizations that that can substantially increase the size of the
+resulting binary. This flag is particularly useful in projects that are split
+into performance-critical parts and associated bindings. In this case, we can
+compile the project in release mode (and hence, optimize performance globally),
+and specify ``OPT_SIZE`` for the binding target, where size might be the main
+concern as performance is often less critical here. A ~25% size reduction has
+been observed in practice. This flag only changes the optimization behavior at
+a per-target level and takes precedence over the global CMake build type
+(``Release``, ``RelWithDebInfo``) except for ``Debug`` builds, where
+optimizations remain disabled.
 
 .. _ThinLTO: http://clang.llvm.org/docs/ThinLTO.html
 
 Configuration variables
 -----------------------
 
-By default, pybind11 will compile modules with the C++14 standard, if available
-on the target compiler, falling back to C++11 if C++14 support is not
-available.  Note, however, that this default is subject to change: future
-pybind11 releases are expected to migrate to newer C++ standards as they become
-available.  To override this, the standard flag can be given explicitly in
-``PYBIND11_CPP_STANDARD``:
+By default, pybind11 will compile modules with the compiler default or the
+minimum standard required by pybind11, whichever is higher.  You can set the
+standard explicitly with
+`CMAKE_CXX_STANDARD <https://cmake.org/cmake/help/latest/variable/CMAKE_CXX_STANDARD.html>`_:
 
 .. code-block:: cmake
 
-    # Use just one of these:
-    # GCC/clang:
-    set(PYBIND11_CPP_STANDARD -std=c++11)
-    set(PYBIND11_CPP_STANDARD -std=c++14)
-    set(PYBIND11_CPP_STANDARD -std=c++1z) # Experimental C++17 support
-    # MSVC:
-    set(PYBIND11_CPP_STANDARD /std:c++14)
-    set(PYBIND11_CPP_STANDARD /std:c++latest) # Enables some MSVC C++17 features
+    set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ version selection")  # or 11, 14, 17, 20
+    set(CMAKE_CXX_STANDARD_REQUIRED ON)  # optional, ensure standard is supported
+    set(CMAKE_CXX_EXTENSIONS OFF)  # optional, keep compiler extensionsn off
 
-    add_subdirectory(pybind11)  # or find_package(pybind11)
+The variables can also be set when calling CMake from the command line using
+the ``-D<variable>=<value>`` flag. You can also manually set ``CXX_STANDARD``
+on a target or use ``target_compile_features`` on your targets - anything that
+CMake supports.
 
-Note that this and all other configuration variables must be set **before** the
-call to ``add_subdirectory`` or ``find_package``. The variables can also be set
-when calling CMake from the command line using the ``-D<variable>=<value>`` flag.
-
-The target Python version can be selected by setting ``PYBIND11_PYTHON_VERSION``
-or an exact Python installation can be specified with ``PYTHON_EXECUTABLE``.
-For example:
+Classic Python support: The target Python version can be selected by setting
+``PYBIND11_PYTHON_VERSION`` or an exact Python installation can be specified
+with ``PYTHON_EXECUTABLE``.  For example:
 
 .. code-block:: bash
 
     cmake -DPYBIND11_PYTHON_VERSION=3.6 ..
-    # or
-    cmake -DPYTHON_EXECUTABLE=path/to/python ..
+
+    # Another method:
+    cmake -DPYTHON_EXECUTABLE=/path/to/python ..
+
+    # This often is a good way to get the current Python, works in environments:
+    cmake -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") ..
+
 
 find_package vs. add_subdirectory
 ---------------------------------
@@ -143,8 +362,8 @@
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 2.8.12)
-    project(example)
+    cmake_minimum_required(VERSION 3.4...3.18)
+    project(example LANGUAGES CXX)
 
     find_package(pybind11 REQUIRED)
     pybind11_add_module(example example.cpp)
@@ -155,12 +374,19 @@
 
 .. code-block:: bash
 
+    # Classic CMake
     cd pybind11
     mkdir build
     cd build
     cmake ..
     make install
 
+    # CMake 3.15+
+    cd pybind11
+    cmake -S . -B build
+    cmake --build build -j 2  # Build on 2 cores
+    cmake --install build
+
 Once detected, the aforementioned ``pybind11_add_module`` can be employed as
 before. The function usage and configuration variables are identical no matter
 if pybind11 is added as a subdirectory or found as an installed package. You
@@ -169,41 +395,134 @@
 
 .. _Config file: https://github.com/pybind/pybind11/blob/master/tools/pybind11Config.cmake.in
 
-Advanced: interface library target
-----------------------------------
 
-When using a version of CMake greater than 3.0, pybind11 can additionally
-be used as a special *interface library* . The target ``pybind11::module``
-is available with pybind11 headers, Python headers and libraries as needed,
-and C++ compile definitions attached. This target is suitable for linking
-to an independently constructed (through ``add_library``, not
-``pybind11_add_module``) target in the consuming project.
+.. _find-python-mode:
+
+FindPython mode
+---------------
+
+CMake 3.12+ (3.15+ recommended) added a new module called FindPython that had a
+highly improved search algorithm and modern targets and tools. If you use
+FindPython, pybind11 will detect this and use the existing targets instead:
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 3.0)
-    project(example)
+    cmake_minumum_required(VERSION 3.15...3.18)
+    project(example LANGUAGES CXX)
+
+    find_package(Python COMPONENTS Interpreter Development REQUIRED)
+    find_package(pybind11 CONFIG REQUIRED)
+    # or add_subdirectory(pybind11)
+
+    pybind11_add_module(example example.cpp)
+
+You can also use the targets (as listed below) with FindPython. If you define
+``PYBIND11_FINDPYTHON``, pybind11 will perform the FindPython step for you
+(mostly useful when building pybind11's own tests, or as a way to change search
+algorithms from the CMake invocation, with ``-DPYBIND11_FINDPYTHON=ON``.
+
+.. warning::
+
+    If you use FindPython2 and FindPython3 to dual-target Python, use the
+    individual targets listed below, and avoid targets that directly include
+    Python parts.
+
+There are `many ways to hint or force a discovery of a specific Python
+installation <https://cmake.org/cmake/help/latest/module/FindPython.html>`_),
+setting ``Python_ROOT_DIR`` may be the most common one (though with
+virtualenv/venv support, and Conda support, this tends to find the correct
+Python version more often than the old system did).
+
+.. versionadded:: 2.6
+
+Advanced: interface library targets
+-----------------------------------
+
+Pybind11 supports modern CMake usage patterns with a set of interface targets,
+available in all modes. The targets provided are:
+
+   ``pybind11::headers``
+     Just the pybind11 headers and minimum compile requirements
+
+   ``pybind11::python2_no_register``
+     Quiets the warning/error when mixing C++14 or higher and Python 2
+
+   ``pybind11::pybind11``
+     Python headers + ``pybind11::headers`` + ``pybind11::python2_no_register`` (Python 2 only)
+
+   ``pybind11::python_link_helper``
+     Just the "linking" part of pybind11:module
+
+   ``pybind11::module``
+     Everything for extension modules - ``pybind11::pybind11`` + ``Python::Module`` (FindPython CMake 3.15+) or ``pybind11::python_link_helper``
+
+   ``pybind11::embed``
+     Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Embed`` (FindPython) or Python libs
+
+   ``pybind11::lto`` / ``pybind11::thin_lto``
+     An alternative to `INTERPROCEDURAL_OPTIMIZATION` for adding link-time optimization.
+
+   ``pybind11::windows_extras``
+     ``/bigobj`` and ``/mp`` for MSVC.
+
+   ``pybind11::opt_size``
+     ``/Os`` for MSVC, ``-Os`` for other compilers. Does nothing for debug builds.
+
+Two helper functions are also provided:
+
+    ``pybind11_strip(target)``
+      Strips a target (uses ``CMAKE_STRIP`` after the target is built)
+
+    ``pybind11_extension(target)``
+      Sets the correct extension (with SOABI) for a target.
+
+You can use these targets to build complex applications. For example, the
+``add_python_module`` function is identical to:
+
+.. code-block:: cmake
+
+    cmake_minimum_required(VERSION 3.4)
+    project(example LANGUAGES CXX)
 
     find_package(pybind11 REQUIRED)  # or add_subdirectory(pybind11)
 
     add_library(example MODULE main.cpp)
-    target_link_libraries(example PRIVATE pybind11::module)
-    set_target_properties(example PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
-                                             SUFFIX "${PYTHON_MODULE_EXTENSION}")
+
+    target_link_libraries(example PRIVATE pybind11::module pybind11::lto pybind11::windows_extras)
+
+    pybind11_extension(example)
+    pybind11_strip(example)
+
+    set_target_properties(example PROPERTIES CXX_VISIBILITY_PRESET "hidden"
+                                             CUDA_VISIBILITY_PRESET "hidden")
+
+Instead of setting properties, you can set ``CMAKE_*`` variables to initialize these correctly.
 
 .. warning::
 
     Since pybind11 is a metatemplate library, it is crucial that certain
     compiler flags are provided to ensure high quality code generation. In
     contrast to the ``pybind11_add_module()`` command, the CMake interface
-    library only provides the *minimal* set of parameters to ensure that the
-    code using pybind11 compiles, but it does **not** pass these extra compiler
-    flags (i.e. this is up to you).
+    provides a *composable* set of targets to ensure that you retain flexibility.
+    It can be expecially important to provide or set these properties; the
+    :ref:`FAQ <faq:symhidden>` contains an explanation on why these are needed.
 
-    These include Link Time Optimization (``-flto`` on GCC/Clang/ICPC, ``/GL``
-    and ``/LTCG`` on Visual Studio) and .OBJ files with many sections on Visual
-    Studio (``/bigobj``).  The :ref:`FAQ <faq:symhidden>` contains an
-    explanation on why these are needed.
+.. versionadded:: 2.6
+
+.. _nopython-mode:
+
+Advanced: NOPYTHON mode
+-----------------------
+
+If you want complete control, you can set ``PYBIND11_NOPYTHON`` to completely
+disable Python integration (this also happens if you run ``FindPython2`` and
+``FindPython3`` without running ``FindPython``). This gives you complete
+freedom to integrate into an existing system (like `Scikit-Build's
+<https://scikit-build.readthedocs.io>`_ ``PythonExtensions``).
+``pybind11_add_module`` and ``pybind11_extension`` will be unavailable, and the
+targets will be missing any Python specific behavior.
+
+.. versionadded:: 2.6
 
 Embedding the Python interpreter
 --------------------------------
@@ -217,8 +536,8 @@
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 3.0)
-    project(example)
+    cmake_minimum_required(VERSION 3.4...3.18)
+    project(example LANGUAGES CXX)
 
     find_package(pybind11 REQUIRED)  # or add_subdirectory(pybind11)
 
@@ -255,7 +574,7 @@
 on the distribution; in the latter case, the module extension can be manually
 set to ``.so``.
 
-On Mac OS: the build command is almost the same but it also requires passing
+On macOS: the build command is almost the same but it also requires passing
 the ``-undefined dynamic_lookup`` flag so as to ignore missing symbols when
 building the module:
 
@@ -279,6 +598,13 @@
     of possibly importing a second Python library into a process that already
     contains one (which will lead to a segfault).
 
+
+Building with Bazel
+===================
+
+You can build with the Bazel build system using the `pybind11_bazel
+<https://github.com/pybind/pybind11_bazel>`_ repository.
+
 Generating binding code automatically
 =====================================
 
@@ -287,3 +613,18 @@
 [binder]_ documentation for details.
 
 .. [binder] http://cppbinder.readthedocs.io/en/latest/about.html
+
+[AutoWIG]_ is a Python library that wraps automatically compiled libraries into
+high-level languages. It parses C++ code using LLVM/Clang technologies and
+generates the wrappers using the Mako templating engine. The approach is automatic,
+extensible, and applies to very complex C++ libraries, composed of thousands of
+classes or incorporating modern meta-programming constructs.
+
+.. [AutoWIG] https://github.com/StatisKit/AutoWIG
+
+[robotpy-build]_ is a is a pure python, cross platform build tool that aims to
+simplify creation of python wheels for pybind11 projects, and provide
+cross-project dependency management. Additionally, it is able to autogenerate
+customizable pybind11-based wrappers by parsing C++ header files.
+
+.. [robotpy-build] https://robotpy-build.readthedocs.io
diff --git a/docs/conf.py b/docs/conf.py
index fa6332d..66db310 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,53 +17,65 @@
 import os
 import shlex
 import subprocess
+from pathlib import Path
+import re
+
+DIR = Path(__file__).parent.resolve()
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['breathe']
+extensions = [
+    "breathe",
+    "sphinxcontrib.rsvgconverter",
+    "sphinxcontrib.moderncmakedomain",
+]
 
-breathe_projects = {'pybind11': '.build/doxygenxml/'}
-breathe_default_project = 'pybind11'
-breathe_domain_by_extension = {'h': 'cpp'}
+breathe_projects = {"pybind11": ".build/doxygenxml/"}
+breathe_default_project = "pybind11"
+breathe_domain_by_extension = {"h": "cpp"}
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['.templates']
+templates_path = [".templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = 'pybind11'
-copyright = '2017, Wenzel Jakob'
-author = 'Wenzel Jakob'
+project = "pybind11"
+copyright = "2017, Wenzel Jakob"
+author = "Wenzel Jakob"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-#
-# The short X.Y version.
-version = '2.5'
+
+# Read the listed version
+with open("../pybind11/_version.py") as f:
+    code = compile(f.read(), "../pybind11/_version.py", "exec")
+loc = {}
+exec(code, loc)
+
 # The full version, including alpha/beta/rc tags.
-release = '2.5.0'
+version = loc["__version__"]
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -74,37 +86,37 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['.build', 'release.rst']
+exclude_patterns = [".build", "release.rst"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-default_role = 'any'
+default_role = "any"
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-#pygments_style = 'monokai'
+# pygments_style = 'monokai'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -115,141 +127,148 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 
-on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+on_rtd = os.environ.get("READTHEDOCS", None) == "True"
 
 if not on_rtd:  # only import and set the theme if we're building docs locally
     import sphinx_rtd_theme
-    html_theme = 'sphinx_rtd_theme'
+
+    html_theme = "sphinx_rtd_theme"
     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
-    html_context = {
-        'css_files': [
-            '_static/theme_overrides.css'
-        ]
-    }
+    html_context = {"css_files": ["_static/theme_overrides.css"]}
 else:
     html_context = {
-        'css_files': [
-            '//media.readthedocs.org/css/sphinx_rtd_theme.css',            
-            '//media.readthedocs.org/css/readthedocs-doc-embed.css',    
-            '_static/theme_overrides.css'
+        "css_files": [
+            "//media.readthedocs.org/css/sphinx_rtd_theme.css",
+            "//media.readthedocs.org/css/readthedocs-doc-embed.css",
+            "_static/theme_overrides.css",
         ]
     }
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
+# "<project> v<version> documentation".
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'pybind11doc'
+htmlhelp_basename = "pybind11doc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
+    # The paper size ('letterpaper' or 'a4paper').
+    # 'papersize': 'letterpaper',
+    #
+    # The font size ('10pt', '11pt' or '12pt').
+    # 'pointsize': '10pt',
+    #
+    # Additional stuff for the LaTeX preamble.
+    # remove blank pages (between the title page and the TOC, etc.)
+    "classoptions": ",openany,oneside",
+    "preamble": r"""
+\usepackage{fontawesome}
+\usepackage{textgreek}
+\DeclareUnicodeCharacter{00A0}{}
+\DeclareUnicodeCharacter{2194}{\faArrowsH}
+\DeclareUnicodeCharacter{1F382}{\faBirthdayCake}
+\DeclareUnicodeCharacter{1F355}{\faAdjust}
+\DeclareUnicodeCharacter{0301}{'}
+\DeclareUnicodeCharacter{03C0}{\textpi}
 
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-'preamble': '\DeclareUnicodeCharacter{00A0}{}',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+""",
+    # Latex figure (float) alignment
+    # 'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  (master_doc, 'pybind11.tex', 'pybind11 Documentation',
-   'Wenzel Jakob', 'manual'),
+    (master_doc, "pybind11.tex", "pybind11 Documentation", "Wenzel Jakob", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -258,32 +277,29 @@
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'pybind11', 'pybind11 Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "pybind11", "pybind11 Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -292,41 +308,70 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  (master_doc, 'pybind11', 'pybind11 Documentation',
-   author, 'pybind11', 'One line description of project.',
-   'Miscellaneous'),
+    (
+        master_doc,
+        "pybind11",
+        "pybind11 Documentation",
+        author,
+        "pybind11",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
 
-primary_domain = 'cpp'
-highlight_language = 'cpp'
+primary_domain = "cpp"
+highlight_language = "cpp"
 
 
 def generate_doxygen_xml(app):
-    build_dir = os.path.join(app.confdir, '.build')
+    build_dir = os.path.join(app.confdir, ".build")
     if not os.path.exists(build_dir):
         os.mkdir(build_dir)
 
     try:
-        subprocess.call(['doxygen', '--version'])
-        retcode = subprocess.call(['doxygen'], cwd=app.confdir)
+        subprocess.call(["doxygen", "--version"])
+        retcode = subprocess.call(["doxygen"], cwd=app.confdir)
         if retcode < 0:
             sys.stderr.write("doxygen error code: {}\n".format(-retcode))
     except OSError as e:
         sys.stderr.write("doxygen execution failed: {}\n".format(e))
 
 
+def prepare(app):
+    with open(DIR.parent / "README.rst") as f:
+        contents = f.read()
+
+    # Filter out section titles for index.rst for LaTeX
+    if app.builder.name == "latex":
+        contents = re.sub(r"^(.*)\n[-~]{3,}$", r"**\1**", contents, flags=re.MULTILINE)
+
+    with open(DIR / "readme.rst", "w") as f:
+        f.write(contents)
+
+
+def clean_up(app, exception):
+    (DIR / "readme.rst").unlink()
+
+
 def setup(app):
-    """Add hook for building doxygen xml when needed"""
+
+    # Add hook for building doxygen xml when needed
     app.connect("builder-inited", generate_doxygen_xml)
+
+    # Copy the readme in
+    app.connect("builder-inited", prepare)
+
+    # Clean up the generated readme
+    app.connect("build-finished", clean_up)
diff --git a/docs/faq.rst b/docs/faq.rst
index 4d491fb..8bf05a4 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -5,7 +5,7 @@
 ===========================================================
 
 1. Make sure that the name specified in PYBIND11_MODULE is identical to the
-filename of the extension library (without prefixes such as .so)
+filename of the extension library (without suffixes such as .so)
 
 2. If the above did not fix the issue, you are likely using an incompatible
 version of Python (for instance, the extension library was compiled against
@@ -27,18 +27,6 @@
 
 See the first answer.
 
-CMake doesn't detect the right Python version
-=============================================
-
-The CMake-based build system will try to automatically detect the installed
-version of Python and link against that. When this fails, or when there are
-multiple versions of Python and it finds the wrong one, delete
-``CMakeCache.txt`` and then invoke CMake as follows:
-
-.. code-block:: bash
-
-    cmake -DPYTHON_EXECUTABLE:FILEPATH=<path-to-python-executable> .
-
 .. _faq_reference_arguments:
 
 Limitations involving reference arguments
@@ -100,8 +88,8 @@
 
 .. code-block:: cpp
 
-    void init_ex1(py::module &);
-    void init_ex2(py::module &);
+    void init_ex1(py::module_ &);
+    void init_ex2(py::module_ &);
     /* ... */
 
     PYBIND11_MODULE(example, m) {
@@ -114,7 +102,7 @@
 
 .. code-block:: cpp
 
-    void init_ex1(py::module &m) {
+    void init_ex1(py::module_ &m) {
         m.def("add", [](int a, int b) { return a + b; });
     }
 
@@ -122,7 +110,7 @@
 
 .. code-block:: cpp
 
-    void init_ex2(py::module &m) {
+    void init_ex2(py::module_ &m) {
         m.def("sub", [](int a, int b) { return a - b; });
     }
 
@@ -275,17 +263,34 @@
         });
     }
 
+CMake doesn't detect the right Python version
+=============================================
+
+The CMake-based build system will try to automatically detect the installed
+version of Python and link against that. When this fails, or when there are
+multiple versions of Python and it finds the wrong one, delete
+``CMakeCache.txt`` and then add ``-DPYTHON_EXECUTABLE=$(which python)`` to your
+CMake configure line. (Replace ``$(which python)`` with a path to python if
+your prefer.)
+
+You can alternatively try ``-DPYBIND11_FINDPYTHON=ON``, which will activate the
+new CMake FindPython support instead of pybind11's custom search. Requires
+CMake 3.12+, and 3.15+ or 3.18.2+ are even better. You can set this in your
+``CMakeLists.txt`` before adding or finding pybind11, as well.
+
 Inconsistent detection of Python version in CMake and pybind11
 ==============================================================
 
-The functions ``find_package(PythonInterp)`` and ``find_package(PythonLibs)`` provided by CMake
-for Python version detection are not used by pybind11 due to unreliability and limitations that make
-them unsuitable for pybind11's needs. Instead pybind provides its own, more reliable Python detection
-CMake code. Conflicts can arise, however, when using pybind11 in a project that *also* uses the CMake
-Python detection in a system with several Python versions installed.
+The functions ``find_package(PythonInterp)`` and ``find_package(PythonLibs)``
+provided by CMake for Python version detection are modified by pybind11 due to
+unreliability and limitations that make them unsuitable for pybind11's needs.
+Instead pybind11 provides its own, more reliable Python detection CMake code.
+Conflicts can arise, however, when using pybind11 in a project that *also* uses
+the CMake Python detection in a system with several Python versions installed.
 
-This difference may cause inconsistencies and errors if *both* mechanisms are used in the same project. Consider the following
-Cmake code executed in a system with Python 2.7 and 3.x installed:
+This difference may cause inconsistencies and errors if *both* mechanisms are
+used in the same project. Consider the following CMake code executed in a
+system with Python 2.7 and 3.x installed:
 
 .. code-block:: cmake
 
@@ -303,10 +308,24 @@
     find_package(PythonInterp)
     find_package(PythonLibs)
 
-will detect Python 3.x for pybind11 and may crash on ``find_package(PythonLibs)`` afterwards.
+will detect Python 3.x for pybind11 and may crash on
+``find_package(PythonLibs)`` afterwards.
 
-It is advised to avoid using ``find_package(PythonInterp)`` and ``find_package(PythonLibs)`` from CMake and rely
-on pybind11 in detecting Python version. If this is not possible CMake machinery should be called *before* including pybind11.
+There are three possible solutions:
+
+1. Avoid using ``find_package(PythonInterp)`` and ``find_package(PythonLibs)``
+   from CMake and rely on pybind11 in detecting Python version. If this is not
+   possible, the CMake machinery should be called *before* including pybind11.
+2. Set ``PYBIND11_FINDPYTHON`` to ``True`` or use ``find_package(Python
+   COMPONENTS Interpreter Development)`` on modern CMake (3.12+, 3.15+ better,
+   3.18.2+ best). Pybind11 in these cases uses the new CMake FindPython instead
+   of the old, deprecated search tools, and these modules are much better at
+   finding the correct Python.
+3. Set ``PYBIND11_NOPYTHON`` to ``TRUE``. Pybind11 will not search for Python.
+   However, you will have to use the target-based system, and do more setup
+   yourself, because it does not know about or include things that depend on
+   Python, like ``pybind11_add_module``. This might be ideal for integrating
+   into an existing system, like scikit-build's Python helpers.
 
 How to cite this project?
 =========================
diff --git a/docs/index.rst b/docs/index.rst
index d236611..4e2e8ca 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,18 +1,17 @@
-.. only: not latex
+.. only:: latex
 
-    .. image:: pybind11-logo.png
+   Intro
+   =====
 
-pybind11 --- Seamless operability between C++11 and Python
-==========================================================
+.. include:: readme.rst
 
-.. only: not latex
+.. only:: not latex
 
     Contents:
 
 .. toctree::
    :maxdepth: 1
 
-   intro
    changelog
    upgrade
 
@@ -20,6 +19,7 @@
    :caption: The Basics
    :maxdepth: 2
 
+   installing
    basics
    classes
    compiling
@@ -45,3 +45,4 @@
    benchmark
    limitations
    reference
+   cmake/index
diff --git a/docs/installing.rst b/docs/installing.rst
new file mode 100644
index 0000000..2597285
--- /dev/null
+++ b/docs/installing.rst
@@ -0,0 +1,103 @@
+.. _installing:
+
+Installing the library
+######################
+
+There are several ways to get the pybind11 source, which lives at
+`pybind/pybind11 on GitHub <https://github.com/pybind/pybind11>`_. The pybind11
+developers recommend one of the first three ways listed here, submodule, PyPI,
+or conda-forge, for obtaining pybind11.
+
+Include as a submodule
+======================
+
+When you are working on a project in Git, you can use the pybind11 repository
+as a submodule. From your git repository, use:
+
+.. code-block:: bash
+
+    git submodule add ../../pybind/pybind11 extern/pybind11 -b stable
+    git submodule update --init
+
+This assumes you are placing your dependencies in ``extern/``, and that you are
+using GitHub; if you are not using GitHub, use the full https or ssh URL
+instead of the relative URL ``../../pybind/pybind11`` above. Some other servers
+also require the ``.git`` extension (GitHub does not).
+
+From here, you can now include ``extern/pybind11/include``, or you can use
+the various integration tools (see :ref:`compiling`) pybind11 provides directly
+from the local folder.
+
+Include with PyPI
+=================
+
+You can download the sources and CMake files as a Python package from PyPI
+using Pip. Just use:
+
+.. code-block:: bash
+
+    pip install pybind11
+
+This will provide pybind11 in a standard Python package format. If you want
+pybind11 available directly in your environment root, you can use:
+
+.. code-block:: bash
+
+    pip install "pybind11[global]"
+
+This is not recommended if you are installing with your system Python, as it
+will add files to ``/usr/local/include/pybind11`` and
+``/usr/local/share/cmake/pybind11``, so unless that is what you want, it is
+recommended only for use in virtual environments or your ``pyproject.toml``
+file (see :ref:`compiling`).
+
+Include with conda-forge
+========================
+
+You can use pybind11 with conda packaging via `conda-forge
+<https://github.com/conda-forge/pybind11-feedstock>`_:
+
+.. code-block:: bash
+
+    conda install -c conda-forge pybind11
+
+
+Include with vcpkg
+==================
+You can download and install pybind11 using the Microsoft `vcpkg
+<https://github.com/Microsoft/vcpkg/>`_ dependency manager:
+
+.. code-block:: bash
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    vcpkg install pybind11
+
+The pybind11 port in vcpkg is kept up to date by Microsoft team members and
+community contributors. If the version is out of date, please `create an issue
+or pull request <https://github.com/Microsoft/vcpkg/>`_ on the vcpkg
+repository.
+
+Global install with brew
+========================
+
+The brew package manager (Homebrew on macOS, or Linuxbrew on Linux) has a
+`pybind11 package
+<https://github.com/Homebrew/homebrew-core/blob/master/Formula/pybind11.rb>`_.
+To install:
+
+.. code-block:: bash
+
+    brew install pybind11
+
+.. We should list Conan, and possibly a few other C++ package managers (hunter,
+.. perhaps). Conan has a very clean CMake integration that would be good to show.
+
+Other options
+=============
+
+Other locations you can find pybind11 are `listed here
+<https://repology.org/project/python:pybind11/versions>`_; these are maintained
+by various packagers and the community.
diff --git a/docs/intro.rst b/docs/intro.rst
deleted file mode 100644
index 10e1799..0000000
--- a/docs/intro.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-.. image:: pybind11-logo.png
-
-About this project
-==================
-**pybind11** is a lightweight header-only library that exposes C++ types in Python
-and vice versa, mainly to create Python bindings of existing C++ code. Its
-goals and syntax are similar to the excellent `Boost.Python`_ library by David
-Abrahams: to minimize boilerplate code in traditional extension modules by
-inferring type information using compile-time introspection.
-
-.. _Boost.Python: http://www.boost.org/doc/libs/release/libs/python/doc/index.html
-
-The main issue with Boost.Python—and the reason for creating such a similar
-project—is Boost. Boost is an enormously large and complex suite of utility
-libraries that works with almost every C++ compiler in existence. This
-compatibility has its cost: arcane template tricks and workarounds are
-necessary to support the oldest and buggiest of compiler specimens. Now that
-C++11-compatible compilers are widely available, this heavy machinery has
-become an excessively large and unnecessary dependency.
-Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~4K lines of code and depend on
-Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
-compact implementation was possible thanks to some of the new C++11 language
-features (specifically: tuples, lambda functions and variadic templates). Since
-its creation, this library has grown beyond Boost.Python in many ways, leading
-to dramatically simpler binding code in many common situations.
-
-Core features
-*************
-The following core C++ features can be mapped to Python
-
-- Functions accepting and returning custom data structures per value, reference, or pointer
-- Instance methods and static methods
-- Overloaded functions
-- Instance attributes and static attributes
-- Arbitrary exception types
-- Enumerations
-- Callbacks
-- Iterators and ranges
-- Custom operators
-- Single and multiple inheritance
-- STL data structures
-- Smart pointers with reference counting like ``std::shared_ptr``
-- Internal references with correct reference counting
-- C++ classes with virtual (and pure virtual) methods can be extended in Python
-
-Goodies
-*******
-In addition to the core functionality, pybind11 provides some extra goodies:
-
-- Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7) are supported with an
-  implementation-agnostic interface.
-
-- It is possible to bind C++11 lambda functions with captured variables. The
-  lambda capture data is stored inside the resulting Python function object.
-
-- pybind11 uses C++11 move constructors and move assignment operators whenever
-  possible to efficiently transfer custom data types.
-
-- It's easy to expose the internal storage of custom data types through
-  Pythons' buffer protocols. This is handy e.g. for fast conversion between
-  C++ matrix classes like Eigen and NumPy without expensive copy operations.
-
-- pybind11 can automatically vectorize functions so that they are transparently
-  applied to all entries of one or more NumPy array arguments.
-
-- Python's slice-based access and assignment operations can be supported with
-  just a few lines of code.
-
-- Everything is contained in just a few header files; there is no need to link
-  against any additional libraries.
-
-- Binaries are generally smaller by a factor of at least 2 compared to
-  equivalent bindings generated by Boost.Python. A recent pybind11 conversion
-  of `PyRosetta`_, an enormous Boost.Python binding project, reported a binary
-  size reduction of **5.4x** and compile time reduction by **5.8x**.
-
-- Function signatures are precomputed at compile time (using ``constexpr``),
-  leading to smaller binaries.
-
-- With little extra effort, C++ types can be pickled and unpickled similar to
-  regular Python objects.
-
-.. _PyRosetta: http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf
-
-Supported compilers
-*******************
-
-1. Clang/LLVM (any non-ancient version with C++11 support)
-2. GCC 4.8 or newer
-3. Microsoft Visual Studio 2015 or newer
-4. Intel C++ compiler v17 or newer (v16 with pybind11 v2.0 and v15 with pybind11 v2.0 and a `workaround <https://github.com/pybind/pybind11/issues/276>`_ )
diff --git a/docs/limitations.rst b/docs/limitations.rst
index a1a4f1a..be7300c 100644
--- a/docs/limitations.rst
+++ b/docs/limitations.rst
@@ -1,6 +1,9 @@
 Limitations
 ###########
 
+Design choices
+^^^^^^^^^^^^^^
+
 pybind11 strives to be a general solution to binding generation, but it also has
 certain limitations:
 
@@ -11,10 +14,59 @@
 
 - The NumPy interface ``pybind11::array`` greatly simplifies accessing
   numerical data from C++ (and vice versa), but it's not a full-blown array
-  class like ``Eigen::Array`` or ``boost.multi_array``.
+  class like ``Eigen::Array`` or ``boost.multi_array``. ``Eigen`` objects are
+  directly supported, however, with ``pybind11/eigen.h``.
 
-These features could be implemented but would lead to a significant increase in
-complexity. I've decided to draw the line here to keep this project simple and
-compact. Users who absolutely require these features are encouraged to fork
-pybind11.
+Large but useful features could be implemented in pybind11 but would lead to a
+significant increase in complexity. Pybind11 strives to be simple and compact.
+Users who require large new features are encouraged to write an extension to
+pybind11; see `pybind11_json <https://github.com/pybind/pybind11_json>`_ for an
+example.
 
+
+Known bugs
+^^^^^^^^^^
+
+These are issues that hopefully will one day be fixed, but currently are
+unsolved. If you know how to help with one of these issues, contributions
+are welcome!
+
+- Intel 20.2 is currently having an issue with the test suite.
+  `#2573 <https://github.com/pybind/pybind11/pull/2573>`_
+
+- Debug mode Python does not support 1-5 tests in the test suite currently.
+  `#2422 <https://github.com/pybind/pybind11/pull/2422>`_
+
+- PyPy3 7.3.1 and 7.3.2 have issues with several tests on 32-bit Windows.
+
+Known limitations
+^^^^^^^^^^^^^^^^^
+
+These are issues that are probably solvable, but have not been fixed yet. A
+clean, well written patch would likely be accepted to solve them.
+
+- Type casters are not kept alive recursively.
+  `#2527 <https://github.com/pybind/pybind11/issues/2527>`_
+  One consequence is that containers of ``char *`` are currently not supported.
+  `#2245 <https://github.com/pybind/pybind11/issues/2245>`_
+
+- The ``cpptest`` does not run on Windows with Python 3.8 or newer, due to DLL
+  loader changes. User code that is correctly installed should not be affected.
+  `#2560 <https://github.com/pybind/pybind11/issue/2560>`_
+
+Python 3.9.0 warning
+^^^^^^^^^^^^^^^^^^^^
+
+Combining older versions of pybind11 (< 2.6.0) with Python on 3.9.0 will
+trigger undefined behavior that typically manifests as crashes during
+interpreter shutdown (but could also destroy your data. **You have been
+warned**).
+
+This issue has been
+`fixed in Python <https://github.com/python/cpython/pull/22670>`_.  As a
+mitigation until 3.9.1 is released and commonly used, pybind11 (2.6.0 or newer)
+includes a temporary workaround specifically when Python 3.9.0 is detected at
+runtime, leaking about 50 bytes of memory when a callback function is garbage
+collected. For reference; the pybind11 test suite has about 2,000 such
+callbacks, but only 49 are garbage collected before the end-of-process. Wheels
+built with Python 3.9.0 will correctly avoid the leak when run in Python 3.9.1.
diff --git a/docs/reference.rst b/docs/reference.rst
index a9fbe60..e3a61af 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -46,7 +46,7 @@
 Convenience classes for specific Python types
 =============================================
 
-.. doxygenclass:: module
+.. doxygenclass:: module_
     :members:
 
 .. doxygengroup:: pytypes
@@ -91,15 +91,15 @@
 
 See :doc:`/classes` and :doc:`/advanced/classes` for more detail.
 
-.. doxygendefine:: PYBIND11_OVERLOAD
+.. doxygendefine:: PYBIND11_OVERRIDE
 
-.. doxygendefine:: PYBIND11_OVERLOAD_PURE
+.. doxygendefine:: PYBIND11_OVERRIDE_PURE
 
-.. doxygendefine:: PYBIND11_OVERLOAD_NAME
+.. doxygendefine:: PYBIND11_OVERRIDE_NAME
 
-.. doxygendefine:: PYBIND11_OVERLOAD_PURE_NAME
+.. doxygendefine:: PYBIND11_OVERRIDE_PURE_NAME
 
-.. doxygenfunction:: get_overload
+.. doxygenfunction:: get_override
 
 Exceptions
 ==========
diff --git a/docs/release.rst b/docs/release.rst
index 9846f97..43f502a 100644
--- a/docs/release.rst
+++ b/docs/release.rst
@@ -1,21 +1,77 @@
-To release a new version of pybind11:
+On version numbers
+^^^^^^^^^^^^^^^^^^
 
-- Update the version number and push to pypi
-    - Update ``pybind11/_version.py`` (set release version, remove 'dev').
-    - Update ``PYBIND11_VERSION_MAJOR`` etc. in ``include/pybind11/detail/common.h``.
-    - Ensure that all the information in ``setup.py`` is up-to-date.
-    - Update version in ``docs/conf.py``.
-    - Tag release date in ``docs/changelog.rst``.
-    - ``git add`` and ``git commit``.
-    - if new minor version: ``git checkout -b vX.Y``, ``git push -u origin vX.Y``
-    - ``git tag -a vX.Y.Z -m 'vX.Y.Z release'``.
+The two version numbers (C++ and Python) must match when combined (checked when
+you build the PyPI package), and must be a valid `PEP 440
+<https://www.python.org/dev/peps/pep-0440>`_ version when combined.
+
+For example:
+
+.. code-block:: C++
+
+    #define PYBIND11_VERSION_MAJOR X
+    #define PYBIND11_VERSION_MINOR Y
+    #define PYBIND11_VERSION_PATCH Z.dev1
+
+For beta, ``PYBIND11_VERSION_PATCH`` should be ``Z.b1``. RC's can be ``Z.rc1``.
+Always include the dot (even though PEP 440 allows it to be dropped). For a
+final release, this must be a simple integer.
+
+
+To release a new version of pybind11:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Update the version number
+  - Update ``PYBIND11_VERSION_MAJOR`` etc. in
+    ``include/pybind11/detail/common.h``. PATCH should be a simple integer.
+  - Update ``pybind11/_version.py`` (match above)
+  - Ensure that all the information in ``setup.py`` is up-to-date.
+  - Add release date in ``docs/changelog.rst``.
+  - ``git add`` and ``git commit``, ``git push``. **Ensure CI passes**. (If it
+    fails due to a known flake issue, either ignore or restart CI.)
+- Add a release branch if this is a new minor version
+  - ``git checkout -b vX.Y``, ``git push -u origin vX.Y``
+- Update tags (optional; if you skip this, the GitHub release makes a
+  non-annotated tag for you)
+  - ``git tag -a vX.Y.Z -m 'vX.Y.Z release'``.
+  - ``git push --tags``.
+- Update stable
+    - ``git checkout stable``
+    - ``git merge master``
     - ``git push``
-    - ``git push --tags``.
-    - ``python setup.py sdist upload``.
-    - ``python setup.py bdist_wheel upload``.
+- Make a GitHub release (this shows up in the UI, sends new release
+  notifications to users watching releases, and also uploads PyPI packages).
+  (Note: if you do not use an existing tag, this creates a new lightweight tag
+  for you, so you could skip the above step).
+  - GUI method: click "Create a new release" on the far right, fill in the tag
+    name (if you didn't tag above, it will be made here), fill in a release
+    name like "Version X.Y.Z", and optionally copy-and-paste the changelog into
+    the description (processed as markdown by Pandoc). Check "pre-release" if
+    this is a beta/RC.
+  - CLI method: with ``gh`` installed, run ``gh release create vX.Y.Z -t "Version X.Y.Z"``
+    If this is a pre-release, add ``-p``.
+
 - Get back to work
-    - Update ``_version.py`` (add 'dev' and increment minor).
-    - Update version in ``docs/conf.py``
-    - Update version macros in ``include/pybind11/common.h``
-    - ``git add`` and ``git commit``.
-      ``git push``
+  - Make sure you are on master, not somewhere else: ``git checkout master``
+  - Update version macros in ``include/pybind11/detail/common.h`` (set PATCH to
+    ``0.dev1`` and increment MINOR).
+  - Update ``_version.py`` to match
+  - Add a plot for in-development updates in ``docs/changelog.rst``.
+  - ``git add``, ``git commit``, ``git push``
+
+If a version branch is updated, remember to set PATCH to ``1.dev1``.
+
+
+Manual packaging
+^^^^^^^^^^^^^^^^
+
+If you need to manually upload releases, you can download the releases from the job artifacts and upload them with twine. You can also make the files locally (not recommended in general, as your local directory is more likely to be "dirty" and SDists love picking up random unrelated/hidden files); this is the procedure:
+
+.. code-block:: bash
+
+    python3 -m pip install build
+    python3 -m build
+    PYBIND11_SDIST_GLOBAL=1 python3 -m build
+    twine upload dist/*
+
+This makes SDists and wheels, and the final line uploads them.
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 3818fe8..35366e3 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1 +1,7 @@
-breathe == 4.5.0
+breathe==4.20.0
+commonmark==0.9.1
+recommonmark==0.6.0
+sphinx==3.2.1
+sphinx_rtd_theme==0.5.0
+sphinxcontrib-moderncmakedomain==3.13
+sphinxcontrib-svg2pdfconverter==1.1.0
diff --git a/docs/upgrade.rst b/docs/upgrade.rst
index 3f56973..87bcebe 100644
--- a/docs/upgrade.rst
+++ b/docs/upgrade.rst
@@ -8,6 +8,114 @@
 deprecated APIs and their replacements, build system changes, general code
 modernization and other useful information.
 
+.. _upgrade-guide-2.6:
+
+v2.6
+====
+
+Usage of the ``PYBIND11_OVERLOAD*`` macros and ``get_overload`` function should
+be replaced by ``PYBIND11_OVERRIDE*`` and ``get_override``. In the future, the
+old macros may be deprecated and removed.
+
+``py::module`` has been renamed ``py::module_``, but a backward compatible
+typedef has been included. This change was to avoid a language change in C++20
+that requires unqualified ``module`` not be placed at the start of a logical
+line. Qualified usage is unaffected and the typedef will remain unless the
+C++ language rules change again.
+
+The public constructors of ``py::module_`` have been deprecated. Use
+``PYBIND11_MODULE`` or ``module_::create_extension_module`` instead.
+
+An error is now thrown when ``__init__`` is forgotten on subclasses. This was
+incorrect before, but was not checked. Add a call to ``__init__`` if it is
+missing.
+
+A ``py::type_error`` is now thrown when casting to a subclass (like
+``py::bytes`` from ``py::object``) if the conversion is not valid. Make a valid
+conversion instead.
+
+The undocumented ``h.get_type()`` method has been deprecated and replaced by
+``py::type::of(h)``.
+
+Enums now have a ``__str__`` method pre-defined; if you want to override it,
+the simplest fix is to add the new ``py::prepend()`` tag when defining
+``"__str__"``.
+
+If ``__eq__`` defined but not ``__hash__``, ``__hash__`` is now set to
+``None``, as in normal CPython. You should add ``__hash__`` if you intended the
+class to be hashable, possibly using the new ``py::hash`` shortcut.
+
+The constructors for ``py::array`` now always take signed integers for size,
+for consistency. This may lead to compiler warnings on some systems. Cast to
+``py::ssize_t`` instead of ``std::size_t``.
+
+The ``tools/clang`` submodule and ``tools/mkdoc.py`` have been moved to a
+standalone package, `pybind11-mkdoc`_. If you were using those tools, please
+use them via a pip install from the new location.
+
+The ``pybind11`` package on PyPI no longer fills the wheel "headers" slot - if
+you were using the headers from this slot, they are available by requesting the
+``global`` extra, that is, ``pip install "pybind11[global]"``. (Most users will
+be unaffected, as the ``pybind11/include`` location is reported by ``python -m
+pybind11 --includes`` and ``pybind11.get_include()`` is still correct and has
+not changed since 2.5).
+
+.. _pybind11-mkdoc: https://github.com/pybind/pybind11-mkdoc
+
+CMake support:
+--------------
+
+The minimum required version of CMake is now 3.4.  Several details of the CMake
+support have been deprecated; warnings will be shown if you need to change
+something. The changes are:
+
+* ``PYBIND11_CPP_STANDARD=<platform-flag>`` is deprecated, please use
+  ``CMAKE_CXX_STANDARD=<number>`` instead, or any other valid CMake CXX or CUDA
+  standard selection method, like ``target_compile_features``.
+
+* If you do not request a standard, pybind11 targets will compile with the
+  compiler default, but not less than C++11, instead of forcing C++14 always.
+  If you depend on the old behavior, please use ``set(CMAKE_CXX_STANDARD 14 CACHE STRING "")``
+  instead.
+
+* Direct ``pybind11::module`` usage should always be accompanied by at least
+  ``set(CMAKE_CXX_VISIBILITY_PRESET hidden)`` or similar - it used to try to
+  manually force this compiler flag (but not correctly on all compilers or with
+  CUDA).
+
+* ``pybind11_add_module``'s ``SYSTEM`` argument is deprecated and does nothing;
+  linking now behaves like other imported libraries consistently in both
+  config and submodule mode, and behaves like a ``SYSTEM`` library by
+  default.
+
+* If ``PYTHON_EXECUTABLE`` is not set, virtual environments (``venv``,
+  ``virtualenv``, and ``conda``) are prioritized over the standard search
+  (similar to the new FindPython mode).
+
+In addition, the following changes may be of interest:
+
+* ``CMAKE_INTERPROCEDURAL_OPTIMIZATION`` will be respected by
+  ``pybind11_add_module`` if set instead of linking to ``pybind11::lto`` or
+  ``pybind11::thin_lto``.
+
+* Using ``find_package(Python COMPONENTS Interpreter Development)`` before
+  pybind11 will cause pybind11 to use the new Python mechanisms instead of its
+  own custom search, based on a patched version of classic ``FindPythonInterp``
+  / ``FindPythonLibs``. In the future, this may become the default. A recent
+  (3.15+ or 3.18.2+) version of CMake is recommended.
+
+
+
+v2.5
+====
+
+The Python package now includes the headers as data in the package itself, as
+well as in the "headers" wheel slot. ``pybind11 --includes`` and
+``pybind11.get_include()`` report the new location, which is always correct
+regardless of how pybind11 was installed, making the old ``user=`` argument
+meaningless. If you are not using the function to get the location already, you
+are encouraged to switch to the package location.
+
 
 v2.2
 ====
diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h
index 6962d6f..0c41670 100644
--- a/include/pybind11/attr.h
+++ b/include/pybind11/attr.h
@@ -12,7 +12,7 @@
 
 #include "cast.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /// \addtogroup annotations
 /// @{
@@ -23,6 +23,9 @@
 /// Annotation for operators
 struct is_operator { };
 
+/// Annotation for classes that cannot be subclassed
+struct is_final { };
+
 /// Annotation for parent scope
 struct scope { handle value; scope(const handle &s) : value(s) { } };
 
@@ -37,8 +40,9 @@
 
 /// Annotation indicating that a class derives from another given type
 template <typename T> struct base {
+
     PYBIND11_DEPRECATED("base<T>() was deprecated in favor of specifying 'T' as a template argument to class_")
-    base() { }
+    base() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
 };
 
 /// Keep patient alive while nurse lives
@@ -58,7 +62,7 @@
     handle value;
 
     PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.")
-    metaclass() {}
+    metaclass() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
 
     /// Override pybind11's default metaclass
     explicit metaclass(handle value) : value(value) { }
@@ -70,6 +74,9 @@
 /// Annotation to mark enums as an arithmetic type
 struct arithmetic { };
 
+/// Mark a function for addition at the beginning of the existing overload chain instead of the end
+struct prepend { };
+
 /** \rst
     A call policy which places one or more guard variables (``Ts...``) around the function call.
 
@@ -110,7 +117,7 @@
 
 /// @} annotations
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /* Forward declarations */
 enum op_id : int;
 enum op_type : int;
@@ -134,7 +141,8 @@
 struct function_record {
     function_record()
         : is_constructor(false), is_new_style_constructor(false), is_stateless(false),
-          is_operator(false), has_args(false), has_kwargs(false), is_method(false) { }
+          is_operator(false), is_method(false), has_args(false),
+          has_kwargs(false), has_kw_only_args(false), prepend(false) { }
 
     /// Function name
     char *name = nullptr; /* why no C++ strings? They generate heavier code.. */
@@ -172,18 +180,30 @@
     /// True if this is an operator (__add__), etc.
     bool is_operator : 1;
 
+    /// True if this is a method
+    bool is_method : 1;
+
     /// True if the function has a '*args' argument
     bool has_args : 1;
 
     /// True if the function has a '**kwargs' argument
     bool has_kwargs : 1;
 
-    /// True if this is a method
-    bool is_method : 1;
+    /// True once a 'py::kw_only' is encountered (any following args are keyword-only)
+    bool has_kw_only_args : 1;
+
+    /// True if this function is to be inserted at the beginning of the overload resolution chain
+    bool prepend : 1;
 
     /// Number of arguments (including py::args and/or py::kwargs, if present)
     std::uint16_t nargs;
 
+    /// Number of trailing arguments (counted in `nargs`) that are keyword-only
+    std::uint16_t nargs_kw_only = 0;
+
+    /// Number of leading arguments (counted in `nargs`) that are positional-only
+    std::uint16_t nargs_pos_only = 0;
+
     /// Python method object
     PyMethodDef *def = nullptr;
 
@@ -201,7 +221,7 @@
 struct type_record {
     PYBIND11_NOINLINE type_record()
         : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false),
-          default_holder(true), module_local(false) { }
+          default_holder(true), module_local(false), is_final(false) { }
 
     /// Handle to the parent scope
     handle scope;
@@ -254,6 +274,9 @@
     /// Is the class definition local to the module shared object?
     bool module_local : 1;
 
+    /// Is the class inheritable from python classes?
+    bool is_final : 1;
+
     PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) {
         auto base_info = detail::get_type_info(base, false);
         if (!base_info) {
@@ -353,12 +376,20 @@
     static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; }
 };
 
+inline void process_kw_only_arg(const arg &a, function_record *r) {
+    if (!a.name || strlen(a.name) == 0)
+        pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation");
+    ++r->nargs_kw_only;
+}
+
 /// Process a keyword argument attribute (*without* a default value)
 template <> struct process_attribute<arg> : process_attribute_default<arg> {
     static void init(const arg &a, function_record *r) {
         if (r->is_method && r->args.empty())
             r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/);
         r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none);
+
+        if (r->has_kw_only_args) process_kw_only_arg(a, r);
     }
 };
 
@@ -390,6 +421,22 @@
 #endif
         }
         r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none);
+
+        if (r->has_kw_only_args) process_kw_only_arg(a, r);
+    }
+};
+
+/// Process a keyword-only-arguments-follow pseudo argument
+template <> struct process_attribute<kw_only> : process_attribute_default<kw_only> {
+    static void init(const kw_only &, function_record *r) {
+        r->has_kw_only_args = true;
+    }
+};
+
+/// Process a positional-only-argument maker
+template <> struct process_attribute<pos_only> : process_attribute_default<pos_only> {
+    static void init(const pos_only &, function_record *r) {
+        r->nargs_pos_only = static_cast<std::uint16_t>(r->args.size());
     }
 };
 
@@ -417,6 +464,11 @@
 };
 
 template <>
+struct process_attribute<is_final> : process_attribute_default<is_final> {
+    static void init(const is_final &, type_record *r) { r->is_final = true; }
+};
+
+template <>
 struct process_attribute<buffer_protocol> : process_attribute_default<buffer_protocol> {
     static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; }
 };
@@ -431,6 +483,12 @@
     static void init(const module_local &l, type_record *r) { r->module_local = l.value; }
 };
 
+/// Process a 'prepend' attribute, putting this at the beginning of the overload chain
+template <>
+struct process_attribute<prepend> : process_attribute_default<prepend> {
+    static void init(const prepend &, function_record *r) { r->prepend = true; }
+};
+
 /// Process an 'arithmetic' attribute for enums (does nothing here)
 template <>
 struct process_attribute<arithmetic> : process_attribute_default<arithmetic> {};
@@ -489,5 +547,5 @@
     return named == 0 || (self + named + has_args + has_kwargs) == nargs;
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/buffer_info.h b/include/pybind11/buffer_info.h
index 1f4115a..d803004 100644
--- a/include/pybind11/buffer_info.h
+++ b/include/pybind11/buffer_info.h
@@ -11,7 +11,30 @@
 
 #include "detail/common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+// Default, C-style strides
+inline std::vector<ssize_t> c_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
+    auto ndim = shape.size();
+    std::vector<ssize_t> strides(ndim, itemsize);
+    if (ndim > 0)
+        for (size_t i = ndim - 1; i > 0; --i)
+            strides[i - 1] = strides[i] * shape[i];
+    return strides;
+}
+
+// F-style strides; default when constructing an array_t with `ExtraFlags & f_style`
+inline std::vector<ssize_t> f_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
+    auto ndim = shape.size();
+    std::vector<ssize_t> strides(ndim, itemsize);
+    for (size_t i = 1; i < ndim; ++i)
+        strides[i] = strides[i - 1] * shape[i - 1];
+    return strides;
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 /// Information record describing a Python buffer object
 struct buffer_info {
@@ -24,7 +47,7 @@
     std::vector<ssize_t> strides; // Number of bytes between adjacent entries (for each per dimension)
     bool readonly = false;        // flag to indicate if the underlying storage may be written to
 
-    buffer_info() { }
+    buffer_info() = default;
 
     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
                 detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in, bool readonly=false)
@@ -53,8 +76,15 @@
 
     explicit buffer_info(Py_buffer *view, bool ownview = true)
     : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
-            {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}, view->readonly) {
-        this->view = view;
+            {view->shape, view->shape + view->ndim},
+            /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects
+             * ignore this flag and return a view with NULL strides.
+             * When strides are NULL, build them manually.  */
+            view->strides
+            ? std::vector<ssize_t>(view->strides, view->strides + view->ndim)
+            : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize),
+            view->readonly) {
+        this->m_view = view;
         this->ownview = ownview;
     }
 
@@ -73,16 +103,18 @@
         ndim = rhs.ndim;
         shape = std::move(rhs.shape);
         strides = std::move(rhs.strides);
-        std::swap(view, rhs.view);
+        std::swap(m_view, rhs.m_view);
         std::swap(ownview, rhs.ownview);
         readonly = rhs.readonly;
         return *this;
     }
 
     ~buffer_info() {
-        if (view && ownview) { PyBuffer_Release(view); delete view; }
+        if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; }
     }
 
+    Py_buffer *view() const { return m_view; }
+    Py_buffer *&view() { return m_view; }
 private:
     struct private_ctr_tag { };
 
@@ -90,11 +122,11 @@
                 detail::any_container<ssize_t> &&shape_in, detail::any_container<ssize_t> &&strides_in, bool readonly)
     : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { }
 
-    Py_buffer *view = nullptr;
+    Py_buffer *m_view = nullptr;
     bool ownview = false;
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <typename T, typename SFINAE = void> struct compare_buffer_info {
     static bool compare(const buffer_info& b) {
@@ -110,5 +142,5 @@
     }
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index a0b4d1b..11c61a4 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -36,8 +36,8 @@
 #  define PYBIND11_HAS_U8STRING
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// A life support system for temporary objects created by `type_caster::load()`.
 /// Adding a patient will keep it alive up until the enclosing function returns.
@@ -59,7 +59,7 @@
         Py_CLEAR(ptr);
 
         // A heuristic to reduce the stack's capacity (e.g. after long recursive calls)
-        if (stack.capacity() > 16 && stack.size() != 0 && stack.capacity() / stack.size() > 2)
+        if (stack.capacity() > 16 && !stack.empty() && stack.capacity() / stack.size() > 2)
             stack.shrink_to_fit();
     }
 
@@ -163,7 +163,7 @@
  */
 PYBIND11_NOINLINE inline detail::type_info* get_type_info(PyTypeObject *type) {
     auto &bases = all_type_info(type);
-    if (bases.size() == 0)
+    if (bases.empty())
         return nullptr;
     if (bases.size() > 1)
         pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases");
@@ -220,7 +220,7 @@
     {}
 
     // Default constructor (used to signal a value-and-holder not found by get_value_and_holder())
-    value_and_holder() {}
+    value_and_holder() = default;
 
     // Used for past-the-end iterator
     value_and_holder(size_t index) : index{index} {}
@@ -288,8 +288,8 @@
         // Past-the-end iterator:
         iterator(size_t end) : curr(end) {}
     public:
-        bool operator==(const iterator &other) { return curr.index == other.curr.index; }
-        bool operator!=(const iterator &other) { return curr.index != other.curr.index; }
+        bool operator==(const iterator &other) const { return curr.index == other.curr.index; }
+        bool operator!=(const iterator &other) const { return curr.index != other.curr.index; }
         iterator &operator++() {
             if (!inst->simple_layout)
                 curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs;
@@ -342,8 +342,8 @@
             "(compile in debug mode for type details)");
 #else
     pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" +
-            std::string(find_type->type->tp_name) + "' is not a pybind11 base of the given `" +
-            std::string(Py_TYPE(this)->tp_name) + "' instance");
+            get_fully_qualified_tp_name(find_type->type) + "' is not a pybind11 base of the given `" +
+            get_fully_qualified_tp_name(Py_TYPE(this)) + "' instance");
 #endif
 }
 
@@ -432,7 +432,7 @@
 
 #if !defined(PYPY_VERSION)
     if (scope.trace) {
-        PyTracebackObject *trace = (PyTracebackObject *) scope.trace;
+        auto *trace = (PyTracebackObject *) scope.trace;
 
         /* Get the deepest trace possible */
         while (trace->tb_next)
@@ -458,7 +458,7 @@
     auto &instances = get_internals().registered_instances;
     auto range = instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
-        for (auto vh : values_and_holders(it->second)) {
+        for (const auto &vh : values_and_holders(it->second)) {
             if (vh.type == type)
                 return handle((PyObject *) it->second);
         }
@@ -636,7 +636,7 @@
     /// native typeinfo, or when the native one wasn't able to produce a value.
     PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) {
         constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID;
-        const auto pytype = src.get_type();
+        const auto pytype = type::handle_of(src);
         if (!hasattr(pytype, local_key))
             return false;
 
@@ -816,7 +816,7 @@
 template <typename T1, typename T2> struct is_copy_assignable<std::pair<T1, T2>>
     : all_of<is_copy_assignable<T1>, is_copy_assignable<T2>> {};
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // polymorphic_type_hook<itype>::get(src, tinfo) determines whether the object pointed
 // to by `src` actually is an instance of some class derived from `itype`.
@@ -835,21 +835,27 @@
 // You may specialize polymorphic_type_hook yourself for types that want to appear
 // polymorphic to Python but do not use C++ RTTI. (This is a not uncommon pattern
 // in performance-sensitive applications, used most notably in LLVM.)
+//
+// polymorphic_type_hook_base allows users to specialize polymorphic_type_hook with
+// std::enable_if. User provided specializations will always have higher priority than
+// the default implementation and specialization provided in polymorphic_type_hook_base.
 template <typename itype, typename SFINAE = void>
-struct polymorphic_type_hook
+struct polymorphic_type_hook_base
 {
     static const void *get(const itype *src, const std::type_info*&) { return src; }
 };
 template <typename itype>
-struct polymorphic_type_hook<itype, detail::enable_if_t<std::is_polymorphic<itype>::value>>
+struct polymorphic_type_hook_base<itype, detail::enable_if_t<std::is_polymorphic<itype>::value>>
 {
     static const void *get(const itype *src, const std::type_info*& type) {
         type = src ? &typeid(*src) : nullptr;
         return dynamic_cast<const void*>(src);
     }
 };
+template <typename itype, typename SFINAE = void>
+struct polymorphic_type_hook : public polymorphic_type_hook_base<itype> {};
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Generic type caster for objects stored on the heap
 template <typename type> class type_caster_base : public type_caster_generic {
@@ -1000,6 +1006,7 @@
     std::is_same<CharT, wchar_t> /* std::wstring */
 >;
 
+
 template <typename T>
 struct type_caster<T, enable_if_t<std::is_arithmetic<T>::value && !is_std_char_type<T>::value>> {
     using _py_type_0 = conditional_t<sizeof(T) <= sizeof(long), long, long long>;
@@ -1028,12 +1035,12 @@
                 : (py_type) PYBIND11_LONG_AS_LONGLONG(src.ptr());
         }
 
+        // Python API reported an error
         bool py_err = py_value == (py_type) -1 && PyErr_Occurred();
 
-        // Protect std::numeric_limits::min/max with parentheses
-        if (py_err || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T) &&
-                       (py_value < (py_type) (std::numeric_limits<T>::min)() ||
-                        py_value > (py_type) (std::numeric_limits<T>::max)()))) {
+        // Check to see if the conversion is valid (integers should match exactly)
+        // Signed/unsigned checks happen elsewhere
+        if (py_err || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) {
             bool type_error = py_err && PyErr_ExceptionMatches(
 #if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION)
                 PyExc_SystemError
@@ -1123,7 +1130,7 @@
         }
 
         /* Check if this is a C++ type */
-        auto &bases = all_type_info((PyTypeObject *) h.get_type().ptr());
+        auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr());
         if (bases.size() == 1) { // Only allowing loading from a single-value type
             value = values_and_holders(reinterpret_cast<instance *>(h.ptr())).begin()->value_ptr();
             return true;
@@ -1233,11 +1240,11 @@
 #endif
         }
 
-        object utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
+        auto utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
             load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr));
         if (!utfNbytes) { PyErr_Clear(); return false; }
 
-        const CharT *buffer = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
+        const auto *buffer = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
         size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT);
         if (UTF_N > 8) { buffer++; length--; } // Skip BOM for UTF-16/32
         value = StringType(buffer, length);
@@ -1251,7 +1258,7 @@
 
     static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
         const char *buffer = reinterpret_cast<const char *>(src.data());
-        ssize_t nbytes = ssize_t(src.size() * sizeof(CharT));
+        auto nbytes = ssize_t(src.size() * sizeof(CharT));
         handle s = decode_utfN(buffer, nbytes);
         if (!s) throw error_already_set();
         return s;
@@ -1267,10 +1274,8 @@
             UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) :
                           PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr);
 #else
-        // PyPy seems to have multiple problems related to PyUnicode_UTF*: the UTF8 version
-        // sometimes segfaults for unknown reasons, while the UTF16 and 32 versions require a
-        // non-const char * arguments, which is also a nuisance, so bypass the whole thing by just
-        // passing the encoding as a string value, which works properly:
+        // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as well),
+        // so bypass the whole thing by just passing the encoding as a string value, which works properly:
         return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr);
 #endif
     }
@@ -1357,7 +1362,7 @@
         // errors.  We also allow want to allow unicode characters U+0080 through U+00FF, as those
         // can fit into a single char value.
         if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) {
-            unsigned char v0 = static_cast<unsigned char>(value[0]);
+            auto v0 = static_cast<unsigned char>(value[0]);
             size_t char0_bytes = !(v0 & 0x80) ? 1 : // low bits only: 0-127
                 (v0 & 0xE0) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence
                 (v0 & 0xF0) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence
@@ -1415,6 +1420,17 @@
         return cast_impl(std::forward<T>(src), policy, parent, indices{});
     }
 
+    // copied from the PYBIND11_TYPE_CASTER macro
+    template <typename T>
+    static handle cast(T *src, return_value_policy policy, handle parent) {
+        if (!src) return none().release();
+        if (policy == return_value_policy::take_ownership) {
+            auto h = cast(std::move(*src), policy, parent); delete src; return h;
+        } else {
+            return cast(*src, policy, parent);
+        }
+    }
+
     static constexpr auto name = _("Tuple[") + concat(make_caster<Ts>::name...) + _("]");
 
     template <typename T> using cast_op_type = type;
@@ -1492,16 +1508,11 @@
     }
 
     explicit operator type*() { return this->value; }
-    explicit operator type&() { return *(this->value); }
+    // static_cast works around compiler error with MSVC 17 and CUDA 10.2
+    // see issue #2180
+    explicit operator type&() { return *(static_cast<type *>(this->value)); }
     explicit operator holder_type*() { return std::addressof(holder); }
-
-    // Workaround for Intel compiler bug
-    // see pybind11 issue 94
-    #if defined(__ICC) || defined(__INTEL_COMPILER)
-    operator holder_type&() { return holder; }
-    #else
     explicit operator holder_type&() { return holder; }
-    #endif
 
     static handle cast(const holder_type &src, return_value_policy, handle) {
         const auto *ptr = holder_helper<holder_type>::get(src);
@@ -1598,6 +1609,10 @@
 
 template <typename T> struct handle_type_name { static constexpr auto name = _<T>(); };
 template <> struct handle_type_name<bytes> { static constexpr auto name = _(PYBIND11_BYTES_NAME); };
+template <> struct handle_type_name<int_> { static constexpr auto name = _("int"); };
+template <> struct handle_type_name<iterable> { static constexpr auto name = _("Iterable"); };
+template <> struct handle_type_name<iterator> { static constexpr auto name = _("Iterator"); };
+template <> struct handle_type_name<none> { static constexpr auto name = _("None"); };
 template <> struct handle_type_name<args> { static constexpr auto name = _("*args"); };
 template <> struct handle_type_name<kwargs> { static constexpr auto name = _("**kwargs"); };
 
@@ -1684,7 +1699,7 @@
         throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)");
 #else
         throw cast_error("Unable to cast Python instance of type " +
-            (std::string) str(handle.get_type()) + " to C++ type '" + type_id<T>() + "'");
+            (std::string) str(type::handle_of(handle)) + " to C++ type '" + type_id<T>() + "'");
 #endif
     }
     return conv;
@@ -1696,7 +1711,7 @@
     return conv;
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // pytype -> C++ type
 template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> = 0>
@@ -1713,13 +1728,16 @@
 
 // C++ type -> py::object
 template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> = 0>
-object cast(const T &value, return_value_policy policy = return_value_policy::automatic_reference,
+object cast(T &&value, return_value_policy policy = return_value_policy::automatic_reference,
             handle parent = handle()) {
+    using no_ref_T = typename std::remove_reference<T>::type;
     if (policy == return_value_policy::automatic)
-        policy = std::is_pointer<T>::value ? return_value_policy::take_ownership : return_value_policy::copy;
+        policy = std::is_pointer<no_ref_T>::value ? return_value_policy::take_ownership :
+                 std::is_lvalue_reference<T>::value ? return_value_policy::copy : return_value_policy::move;
     else if (policy == return_value_policy::automatic_reference)
-        policy = std::is_pointer<T>::value ? return_value_policy::reference : return_value_policy::copy;
-    return reinterpret_steal<object>(detail::make_caster<T>::cast(value, policy, parent));
+        policy = std::is_pointer<no_ref_T>::value ? return_value_policy::reference :
+                 std::is_lvalue_reference<T>::value ? return_value_policy::copy : return_value_policy::move;
+    return reinterpret_steal<object>(detail::make_caster<T>::cast(std::forward<T>(value), policy, parent));
 }
 
 template <typename T> T handle::cast() const { return pybind11::cast<T>(*this); }
@@ -1732,7 +1750,7 @@
         throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references"
             " (compile in debug mode for details)");
 #else
-        throw cast_error("Unable to move from Python " + (std::string) str(obj.get_type()) +
+        throw cast_error("Unable to move from Python " + (std::string) str(type::handle_of(obj)) +
                 " instance to C++ " + type_id<T>() + " instance: instance has multiple references");
 #endif
 
@@ -1741,7 +1759,7 @@
     return ret;
 }
 
-// Calling cast() on an rvalue calls pybind::cast with the object rvalue, which does:
+// Calling cast() on an rvalue calls pybind11::cast with the object rvalue, which does:
 // - If we have to move (because T has no copy constructor), do it.  This will fail if the moved
 //   object has multiple references, but trying to copy will fail to compile.
 // - If both movable and copyable, check ref count: if 1, move; otherwise copy
@@ -1764,22 +1782,22 @@
 template <> inline void object::cast() const & { return; }
 template <> inline void object::cast() && { return; }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Declared in pytypes.h:
 template <typename T, enable_if_t<!is_pyobject<T>::value, int>>
 object object_or_cast(T &&o) { return pybind11::cast(std::forward<T>(o)); }
 
-struct overload_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the OVERLOAD_INT macro
-template <typename ret_type> using overload_caster_t = conditional_t<
-    cast_is_temporary_value_reference<ret_type>::value, make_caster<ret_type>, overload_unused>;
+struct override_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the PYBIND11_OVERRIDE_OVERRIDE macro
+template <typename ret_type> using override_caster_t = conditional_t<
+    cast_is_temporary_value_reference<ret_type>::value, make_caster<ret_type>, override_unused>;
 
 // Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then
 // store the result in the given variable.  For other types, this is a no-op.
 template <typename T> enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&o, make_caster<T> &caster) {
     return cast_op<T>(load_type(caster, o));
 }
-template <typename T> enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&, overload_unused &) {
+template <typename T> enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&, override_unused &) {
     pybind11_fail("Internal error: cast_ref fallback invoked"); }
 
 // Trampoline use: Having a pybind11::cast with an invalid reference type is going to static_assert, even
@@ -1791,7 +1809,7 @@
     pybind11_fail("Internal error: cast_safe fallback invoked"); }
 template <> inline void cast_safe<void>(object &&) {}
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template <return_value_policy policy = return_value_policy::automatic_reference>
 tuple make_tuple() { return tuple(0); }
@@ -1881,6 +1899,16 @@
 #endif
 };
 
+/// \ingroup annotations
+/// Annotation indicating that all following arguments are keyword-only; the is the equivalent of an
+/// unnamed '*' argument (in Python 3)
+struct kw_only {};
+
+/// \ingroup annotations
+/// Annotation indicating that all previous arguments are positional-only; the is the equivalent of an
+/// unnamed '/' argument (in Python 3.8)
+struct pos_only {};
+
 template <typename T>
 arg_v arg::operator=(T &&value) const { return {std::move(*this), std::forward<T>(value)}; }
 
@@ -1892,9 +1920,9 @@
     String literal version of `arg`
  \endrst */
 constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
-}
+} // namespace literals
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // forward declaration (definition in attr.h)
 struct function_record;
@@ -2055,7 +2083,7 @@
     }
 
     void process(list &args_list, detail::args_proxy ap) {
-        for (const auto &a : ap)
+        for (auto a : ap)
             args_list.append(a);
     }
 
@@ -2087,7 +2115,7 @@
     void process(list &/*args_list*/, detail::kwargs_proxy kp) {
         if (!kp)
             return;
-        for (const auto &k : reinterpret_borrow<dict>(kp)) {
+        for (auto k : reinterpret_borrow<dict>(kp)) {
             if (m_kwargs.contains(k.first)) {
 #if defined(NDEBUG)
                 multiple_values_error();
@@ -2165,7 +2193,19 @@
     return operator()<policy>(std::forward<Args>(args)...);
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
+
+
+template<typename T>
+handle type::handle_of() {
+   static_assert(
+      std::is_base_of<detail::type_caster_generic, detail::make_caster<T>>::value,
+      "py::type::of<T> only supports the case where T is a registered C++ types."
+    );
+
+    return detail::get_type_handle(typeid(T), true);
+}
+
 
 #define PYBIND11_MAKE_OPAQUE(...) \
     namespace pybind11 { namespace detail { \
@@ -2173,7 +2213,7 @@
     }}
 
 /// Lets you pass a type containing a `,` through a macro parameter without needing a separate
-/// typedef, e.g.: `PYBIND11_OVERLOAD(PYBIND11_TYPE(ReturnType<A, B>), PYBIND11_TYPE(Parent<C, D>), f, arg)`
+/// typedef, e.g.: `PYBIND11_OVERRIDE(PYBIND11_TYPE(ReturnType<A, B>), PYBIND11_TYPE(Parent<C, D>), f, arg)`
 #define PYBIND11_TYPE(...) __VA_ARGS__
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/chrono.h b/include/pybind11/chrono.h
index ea777e6..c368110 100644
--- a/include/pybind11/chrono.h
+++ b/include/pybind11/chrono.h
@@ -27,15 +27,15 @@
 #define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds)
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <typename type> class duration_caster {
 public:
-    typedef typename type::rep rep;
-    typedef typename type::period period;
+    using rep = typename type::rep;
+    using period = typename type::period;
 
-    typedef std::chrono::duration<uint_fast32_t, std::ratio<86400>> days;
+    using days = std::chrono::duration<uint_fast32_t, std::ratio<86400>>;
 
     bool load(handle src, bool) {
         using namespace std::chrono;
@@ -98,7 +98,7 @@
 // This is for casting times on the system clock into datetime.datetime instances
 template <typename Duration> class type_caster<std::chrono::time_point<std::chrono::system_clock, Duration>> {
 public:
-    typedef std::chrono::time_point<std::chrono::system_clock, Duration> type;
+    using type = std::chrono::time_point<std::chrono::system_clock, Duration>;
     bool load(handle src, bool) {
         using namespace std::chrono;
 
@@ -140,7 +140,7 @@
         }
         else return false;
 
-        value = system_clock::from_time_t(std::mktime(&cal)) + msecs;
+        value = time_point_cast<Duration>(system_clock::from_time_t(std::mktime(&cal)) + msecs);
         return true;
     }
 
@@ -150,21 +150,28 @@
         // Lazy initialise the PyDateTime import
         if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
 
-        std::time_t tt = system_clock::to_time_t(time_point_cast<system_clock::duration>(src));
+        // Get out microseconds, and make sure they are positive, to avoid bug in eastern hemisphere time zones
+        // (cfr. https://github.com/pybind/pybind11/issues/2417)
+        using us_t = duration<int, std::micro>;
+        auto us = duration_cast<us_t>(src.time_since_epoch() % seconds(1));
+        if (us.count() < 0)
+            us += seconds(1);
+
+        // Subtract microseconds BEFORE `system_clock::to_time_t`, because:
+        // > If std::time_t has lower precision, it is implementation-defined whether the value is rounded or truncated.
+        // (https://en.cppreference.com/w/cpp/chrono/system_clock/to_time_t)
+        std::time_t tt = system_clock::to_time_t(time_point_cast<system_clock::duration>(src - us));
         // this function uses static memory so it's best to copy it out asap just in case
         // otherwise other code that is using localtime may break this (not just python code)
         std::tm localtime = *std::localtime(&tt);
 
-        // Declare these special duration types so the conversions happen with the correct primitive types (int)
-        using us_t = duration<int, std::micro>;
-
         return PyDateTime_FromDateAndTime(localtime.tm_year + 1900,
                                           localtime.tm_mon + 1,
                                           localtime.tm_mday,
                                           localtime.tm_hour,
                                           localtime.tm_min,
                                           localtime.tm_sec,
-                                          (duration_cast<us_t>(src.time_since_epoch() % seconds(1))).count());
+                                          us.count());
     }
     PYBIND11_TYPE_CASTER(type, _("datetime.datetime"));
 };
@@ -180,5 +187,5 @@
 : public duration_caster<std::chrono::duration<Rep, Period>> {
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/complex.h b/include/pybind11/complex.h
index 3f89638..f8327eb 100644
--- a/include/pybind11/complex.h
+++ b/include/pybind11/complex.h
@@ -17,7 +17,7 @@
 #  undef I
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 template <typename T> struct format_descriptor<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
     static constexpr const char c = format_descriptor<T>::c;
@@ -32,7 +32,7 @@
 
 #endif
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <typename T> struct is_fmt_numeric<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
     static constexpr bool value = true;
@@ -61,5 +61,5 @@
 
     PYBIND11_TYPE_CASTER(std::complex<T>, _("complex"));
 };
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h
index edfa7de..65dad5a 100644
--- a/include/pybind11/detail/class.h
+++ b/include/pybind11/detail/class.h
@@ -12,10 +12,10 @@
 #include "../attr.h"
 #include "../options.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
-#if PY_VERSION_HEX >= 0x03030000
+#if PY_VERSION_HEX >= 0x03030000 && !defined(PYPY_VERSION)
 #  define PYBIND11_BUILTIN_QUALNAME
 #  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj)
 #else
@@ -24,6 +24,18 @@
 #  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) setattr((PyObject *) obj, "__qualname__", nameobj)
 #endif
 
+inline std::string get_fully_qualified_tp_name(PyTypeObject *type) {
+#if !defined(PYPY_VERSION)
+    return type->tp_name;
+#else
+    auto module_name = handle((PyObject *) type).attr("__module__").cast<std::string>();
+    if (module_name == PYBIND11_BUILTINS_MODULE)
+        return type->tp_name;
+    else
+        return std::move(module_name) + "." + type->tp_name;
+#endif
+}
+
 inline PyTypeObject *type_incref(PyTypeObject *type) {
     Py_INCREF(type);
     return type;
@@ -117,7 +129,7 @@
     //   2. `Type.static_prop = other_static_prop` --> setattro:  replace existing `static_prop`
     //   3. `Type.regular_attribute = value`       --> setattro:  regular attribute assignment
     const auto static_prop = (PyObject *) get_internals().static_property_type;
-    const auto call_descr_set = descr && PyObject_IsInstance(descr, static_prop)
+    const auto call_descr_set = descr && value && PyObject_IsInstance(descr, static_prop)
                                 && !PyObject_IsInstance(value, static_prop);
     if (call_descr_set) {
         // Call `static_property.__set__()` instead of replacing the `static_property`.
@@ -156,6 +168,69 @@
 }
 #endif
 
+/// metaclass `__call__` function that is used to create all pybind11 objects.
+extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, PyObject *kwargs) {
+
+    // use the default metaclass call to create/initialize the object
+    PyObject *self = PyType_Type.tp_call(type, args, kwargs);
+    if (self == nullptr) {
+        return nullptr;
+    }
+
+    // This must be a pybind11 instance
+    auto instance = reinterpret_cast<detail::instance *>(self);
+
+    // Ensure that the base __init__ function(s) were called
+    for (const auto &vh : values_and_holders(instance)) {
+        if (!vh.holder_constructed()) {
+            PyErr_Format(PyExc_TypeError, "%.200s.__init__() must be called when overriding __init__",
+                         get_fully_qualified_tp_name(vh.type->type).c_str());
+            Py_DECREF(self);
+            return nullptr;
+        }
+    }
+
+    return self;
+}
+
+/// Cleanup the type-info for a pybind11-registered type.
+extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
+    auto *type = (PyTypeObject *) obj;
+    auto &internals = get_internals();
+
+    // A pybind11-registered type will:
+    // 1) be found in internals.registered_types_py
+    // 2) have exactly one associated `detail::type_info`
+    auto found_type = internals.registered_types_py.find(type);
+    if (found_type != internals.registered_types_py.end() &&
+        found_type->second.size() == 1 &&
+        found_type->second[0]->type == type) {
+
+        auto *tinfo = found_type->second[0];
+        auto tindex = std::type_index(*tinfo->cpptype);
+        internals.direct_conversions.erase(tindex);
+
+        if (tinfo->module_local)
+            registered_local_types_cpp().erase(tindex);
+        else
+            internals.registered_types_cpp.erase(tindex);
+        internals.registered_types_py.erase(tinfo->type);
+
+        // Actually just `std::erase_if`, but that's only available in C++20
+        auto &cache = internals.inactive_override_cache;
+        for (auto it = cache.begin(), last = cache.end(); it != last; ) {
+            if (it->first == (PyObject *) tinfo->type)
+                it = cache.erase(it);
+            else
+                ++it;
+        }
+
+        delete tinfo;
+    }
+
+    PyType_Type.tp_dealloc(obj);
+}
+
 /** This metaclass is assigned by default to all pybind11 types and is required in order
     for static properties to function correctly. Users may override this using `py::metaclass`.
     Return value: New reference. */
@@ -181,11 +256,15 @@
     type->tp_base = type_incref(&PyType_Type);
     type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
 
+    type->tp_call = pybind11_meta_call;
+
     type->tp_setattro = pybind11_meta_setattro;
 #if PY_MAJOR_VERSION >= 3
     type->tp_getattro = pybind11_meta_getattro;
 #endif
 
+    type->tp_dealloc = pybind11_meta_dealloc;
+
     if (PyType_Ready(type) < 0)
         pybind11_fail("make_default_metaclass(): failure in PyType_Ready()!");
 
@@ -223,7 +302,7 @@
     auto &registered_instances = get_internals().registered_instances;
     auto range = registered_instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
-        if (Py_TYPE(self) == Py_TYPE(it->second)) {
+        if (self == it->second) {
             registered_instances.erase(it);
             return true;
         }
@@ -277,12 +356,7 @@
 /// following default function will be used which simply throws an exception.
 extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject *) {
     PyTypeObject *type = Py_TYPE(self);
-    std::string msg;
-#if defined(PYPY_VERSION)
-    msg += handle((PyObject *) type).attr("__module__").cast<std::string>() + ".";
-#endif
-    msg += type->tp_name;
-    msg += ": No constructor defined!";
+    std::string msg = get_fully_qualified_tp_name(type) + ": No constructor defined!";
     PyErr_SetString(PyExc_TypeError, msg.c_str());
     return -1;
 }
@@ -421,7 +495,7 @@
 extern "C" inline int pybind11_set_dict(PyObject *self, PyObject *new_dict, void *) {
     if (!PyDict_Check(new_dict)) {
         PyErr_Format(PyExc_TypeError, "__dict__ must be set to a dictionary, not a '%.200s'",
-                     Py_TYPE(new_dict)->tp_name);
+                     get_fully_qualified_tp_name(Py_TYPE(new_dict)).c_str());
         return -1;
     }
     PyObject *&dict = *_PyObject_GetDictPtr(self);
@@ -448,11 +522,6 @@
 /// Give instances of this type a `__dict__` and opt into garbage collection.
 inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type) {
     auto type = &heap_type->ht_type;
-#if defined(PYPY_VERSION)
-    pybind11_fail(std::string(type->tp_name) + ": dynamic attributes are "
-                                               "currently not supported in "
-                                               "conjunction with PyPy!");
-#endif
     type->tp_flags |= Py_TPFLAGS_HAVE_GC;
     type->tp_dictoffset = type->tp_basicsize; // place dict at the end
     type->tp_basicsize += (ssize_t)sizeof(PyObject *); // and allocate enough space for it
@@ -540,17 +609,17 @@
 #endif
     }
 
-    object module;
+    object module_;
     if (rec.scope) {
         if (hasattr(rec.scope, "__module__"))
-            module = rec.scope.attr("__module__");
+            module_ = rec.scope.attr("__module__");
         else if (hasattr(rec.scope, "__name__"))
-            module = rec.scope.attr("__name__");
+            module_ = rec.scope.attr("__name__");
     }
 
     auto full_name = c_str(
 #if !defined(PYPY_VERSION)
-        module ? str(module).cast<std::string>() + "." + rec.name :
+        module_ ? str(module_).cast<std::string>() + "." + rec.name :
 #endif
         rec.name);
 
@@ -565,7 +634,7 @@
 
     auto &internals = get_internals();
     auto bases = tuple(rec.bases);
-    auto base = (bases.size() == 0) ? internals.instance_base
+    auto base = (bases.empty()) ? internals.instance_base
                                     : bases[0].ptr();
 
     /* Danger zone: from now (and until PyType_Ready), make sure to
@@ -589,7 +658,7 @@
     type->tp_doc = tp_doc;
     type->tp_base = type_incref((PyTypeObject *)base);
     type->tp_basicsize = static_cast<ssize_t>(sizeof(instance));
-    if (bases.size() > 0)
+    if (!bases.empty())
         type->tp_bases = bases.release().ptr();
 
     /* Don't inherit base __init__ */
@@ -604,10 +673,12 @@
 #endif
 
     /* Flags */
-    type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
+    type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE;
 #if PY_MAJOR_VERSION < 3
     type->tp_flags |= Py_TPFLAGS_CHECKTYPES;
 #endif
+    if (!rec.is_final)
+        type->tp_flags |= Py_TPFLAGS_BASETYPE;
 
     if (rec.dynamic_attr)
         enable_dynamic_attributes(heap_type);
@@ -627,13 +698,13 @@
     else
         Py_INCREF(type); // Keep it alive forever (reference leak)
 
-    if (module) // Needed by pydoc
-        setattr((PyObject *) type, "__module__", module);
+    if (module_) // Needed by pydoc
+        setattr((PyObject *) type, "__module__", module_);
 
     PYBIND11_SET_OLDPY_QUALNAME(type, qualname);
 
     return (PyObject *) type;
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h
index e53f502..751141a 100644
--- a/include/pybind11/detail/common.h
+++ b/include/pybind11/detail/common.h
@@ -9,12 +9,12 @@
 
 #pragma once
 
-#if !defined(NAMESPACE_BEGIN)
-#  define NAMESPACE_BEGIN(name) namespace name {
-#endif
-#if !defined(NAMESPACE_END)
-#  define NAMESPACE_END(name) }
-#endif
+#define PYBIND11_VERSION_MAJOR 2
+#define PYBIND11_VERSION_MINOR 6
+#define PYBIND11_VERSION_PATCH 1
+
+#define PYBIND11_NAMESPACE_BEGIN(name) namespace name {
+#define PYBIND11_NAMESPACE_END(name) }
 
 // Robust support for some features and loading modules compiled against different pybind versions
 // requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on
@@ -47,8 +47,8 @@
 
 // Compiler version assertions
 #if defined(__INTEL_COMPILER)
-#  if __INTEL_COMPILER < 1700
-#    error pybind11 requires Intel C++ compiler v17 or newer
+#  if __INTEL_COMPILER < 1800
+#    error pybind11 requires Intel C++ compiler v18 or newer
 #  endif
 #elif defined(__clang__) && !defined(__apple_build_version__)
 #  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
@@ -92,9 +92,19 @@
 #  define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason)))
 #endif
 
-#define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 5
-#define PYBIND11_VERSION_PATCH 0
+#if defined(PYBIND11_CPP17)
+#  define PYBIND11_MAYBE_UNUSED [[maybe_unused]]
+#elif defined(_MSC_VER) && !defined(__clang__)
+#  define PYBIND11_MAYBE_UNUSED
+#else
+#  define PYBIND11_MAYBE_UNUSED __attribute__ ((__unused__))
+#endif
+
+/* Don't let Python.h #define (v)snprintf as macro because they are implemented
+   properly in Visual Studio since 2015. */
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#  define HAVE_SNPRINTF 1
+#endif
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -144,6 +154,7 @@
 #include <vector>
 #include <string>
 #include <stdexcept>
+#include <exception>
 #include <unordered_set>
 #include <unordered_map>
 #include <memory>
@@ -171,9 +182,11 @@
 #define PYBIND11_STR_TYPE ::pybind11::str
 #define PYBIND11_BOOL_ATTR "__bool__"
 #define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool)
-// Providing a separate declaration to make Clang's -Wmissing-prototypes happy
+#define PYBIND11_BUILTINS_MODULE "builtins"
+// Providing a separate declaration to make Clang's -Wmissing-prototypes happy.
+// See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
 #define PYBIND11_PLUGIN_IMPL(name) \
-    extern "C" PYBIND11_EXPORT PyObject *PyInit_##name();   \
+    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT PyObject *PyInit_##name(); \
     extern "C" PYBIND11_EXPORT PyObject *PyInit_##name()
 
 #else
@@ -197,13 +210,15 @@
 #define PYBIND11_STR_TYPE ::pybind11::bytes
 #define PYBIND11_BOOL_ATTR "__nonzero__"
 #define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero)
-// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy
+#define PYBIND11_BUILTINS_MODULE "__builtin__"
+// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy.
+// See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
 #define PYBIND11_PLUGIN_IMPL(name) \
-    static PyObject *pybind11_init_wrapper();               \
-    extern "C" PYBIND11_EXPORT void init##name();           \
-    extern "C" PYBIND11_EXPORT void init##name() {          \
-        (void)pybind11_init_wrapper();                      \
-    }                                                       \
+    static PyObject *pybind11_init_wrapper();                           \
+    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT void init##name(); \
+    extern "C" PYBIND11_EXPORT void init##name() {                      \
+        (void)pybind11_init_wrapper();                                  \
+    }                                                                   \
     PyObject *pybind11_init_wrapper()
 #endif
 
@@ -250,13 +265,13 @@
     ***Deprecated in favor of PYBIND11_MODULE***
 
     This macro creates the entry point that will be invoked when the Python interpreter
-    imports a plugin library. Please create a `module` in the function body and return
+    imports a plugin library. Please create a `module_` in the function body and return
     the pointer to its underlying Python object at the end.
 
     .. code-block:: cpp
 
         PYBIND11_PLUGIN(example) {
-            pybind11::module m("example", "pybind11 example plugin");
+            pybind11::module_ m("example", "pybind11 example plugin");
             /// Set up bindings here
             return m.ptr();
         }
@@ -277,7 +292,11 @@
     This macro creates the entry point that will be invoked when the Python interpreter
     imports an extension module. The module name is given as the fist argument and it
     should not be in quotes. The second macro argument defines a variable of type
-    `py::module` which can be used to initialize the module.
+    `py::module_` which can be used to initialize the module.
+
+    The entry point is marked as "maybe unused" to aid dead-code detection analysis:
+    since the entry point is typically only looked up at runtime and not referenced
+    during translation, it would otherwise appear as unused ("dead") code.
 
     .. code-block:: cpp
 
@@ -291,20 +310,25 @@
         }
 \endrst */
 #define PYBIND11_MODULE(name, variable)                                        \
-    static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &);     \
+    static ::pybind11::module_::module_def                                     \
+        PYBIND11_CONCAT(pybind11_module_def_, name) PYBIND11_MAYBE_UNUSED;     \
+    PYBIND11_MAYBE_UNUSED                                                      \
+    static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &);  \
     PYBIND11_PLUGIN_IMPL(name) {                                               \
         PYBIND11_CHECK_PYTHON_VERSION                                          \
         PYBIND11_ENSURE_INTERNALS_READY                                        \
-        auto m = pybind11::module(PYBIND11_TOSTRING(name));                    \
+        auto m = ::pybind11::module_::create_extension_module(                 \
+            PYBIND11_TOSTRING(name), nullptr,                                  \
+            &PYBIND11_CONCAT(pybind11_module_def_, name));                     \
         try {                                                                  \
             PYBIND11_CONCAT(pybind11_init_, name)(m);                          \
             return m.ptr();                                                    \
         } PYBIND11_CATCH_INIT_EXCEPTIONS                                       \
     }                                                                          \
-    void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable)
+    void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &variable)
 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 using ssize_t = Py_ssize_t;
 using size_t  = std::size_t;
@@ -361,7 +385,7 @@
     reference_internal
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 inline static constexpr int log2(size_t n, int k = 0) { return (n <= 1) ? k : log2(n >> 1, k + 1); }
 
@@ -470,7 +494,7 @@
 #else
 template<size_t ...> struct index_sequence  { };
 template<size_t N, size_t ...S> struct make_index_sequence_impl : make_index_sequence_impl <N - 1, N - 1, S...> { };
-template<size_t ...S> struct make_index_sequence_impl <0, S...> { typedef index_sequence<S...> type; };
+template<size_t ...S> struct make_index_sequence_impl <0, S...> { using type = index_sequence<S...>; };
 template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type;
 #endif
 
@@ -484,8 +508,16 @@
 template <bool B> using bool_constant = std::integral_constant<bool, B>;
 template <typename T> struct negation : bool_constant<!T::value> { };
 
+// PGI/Intel cannot detect operator delete with the "compatible" void_t impl, so
+// using the new one (C++14 defect, so generally works on newer compilers, even
+// if not in C++17 mode)
+#if defined(__PGIC__) || defined(__INTEL_COMPILER)
+template<typename... > using void_t = void;
+#else
 template <typename...> struct void_t_impl { using type = void; };
 template <typename... Ts> using void_t = typename void_t_impl<Ts...>::type;
+#endif
+
 
 /// Compile-time all/any/none of that check the boolean value of all template types
 #if defined(__cpp_fold_expressions) && !(defined(_MSC_VER) && (_MSC_VER < 1916))
@@ -511,17 +543,17 @@
 
 /// Strip the class from a method type
 template <typename T> struct remove_class { };
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { typedef R type(A...); };
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { typedef R type(A...); };
+template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { using type = R (A...); };
+template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { using type = R (A...); };
 
 /// Helper template to strip away type modifiers
-template <typename T> struct intrinsic_type                       { typedef T type; };
-template <typename T> struct intrinsic_type<const T>              { typedef typename intrinsic_type<T>::type type; };
-template <typename T> struct intrinsic_type<T*>                   { typedef typename intrinsic_type<T>::type type; };
-template <typename T> struct intrinsic_type<T&>                   { typedef typename intrinsic_type<T>::type type; };
-template <typename T> struct intrinsic_type<T&&>                  { typedef typename intrinsic_type<T>::type type; };
-template <typename T, size_t N> struct intrinsic_type<const T[N]> { typedef typename intrinsic_type<T>::type type; };
-template <typename T, size_t N> struct intrinsic_type<T[N]>       { typedef typename intrinsic_type<T>::type type; };
+template <typename T> struct intrinsic_type                       { using type = T; };
+template <typename T> struct intrinsic_type<const T>              { using type = typename intrinsic_type<T>::type; };
+template <typename T> struct intrinsic_type<T*>                   { using type = typename intrinsic_type<T>::type; };
+template <typename T> struct intrinsic_type<T&>                   { using type = typename intrinsic_type<T>::type; };
+template <typename T> struct intrinsic_type<T&&>                  { using type = typename intrinsic_type<T>::type; };
+template <typename T, size_t N> struct intrinsic_type<const T[N]> { using type = typename intrinsic_type<T>::type; };
+template <typename T, size_t N> struct intrinsic_type<T[N]>       { using type = typename intrinsic_type<T>::type; };
 template <typename T> using intrinsic_t = typename intrinsic_type<T>::type;
 
 /// Helper type to replace 'void' in some expressions
@@ -539,7 +571,7 @@
 constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); }
 #endif
 
-NAMESPACE_BEGIN(constexpr_impl)
+PYBIND11_NAMESPACE_BEGIN(constexpr_impl)
 /// Implementation details for constexpr functions
 constexpr int first(int i) { return i; }
 template <typename T, typename... Ts>
@@ -548,7 +580,7 @@
 constexpr int last(int /*i*/, int result) { return result; }
 template <typename T, typename... Ts>
 constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); }
-NAMESPACE_END(constexpr_impl)
+PYBIND11_NAMESPACE_END(constexpr_impl)
 
 /// Return the index of the first type in Ts which satisfies Predicate<T>.  Returns sizeof...(Ts) if
 /// none match.
@@ -592,8 +624,9 @@
 
 /// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived pointer
 /// can be converted to a Base pointer)
+/// For unions, `is_base_of<T, T>::value` is False, so we need to check `is_same` as well.
 template <typename Base, typename Derived> using is_accessible_base_of = bool_constant<
-    std::is_base_of<Base, Derived>::value && std::is_convertible<Derived *, Base *>::value>;
+    (std::is_same<Base, Derived>::value || std::is_base_of<Base, Derived>::value) && std::is_convertible<Derived *, Base *>::value>;
 
 template <template<typename...> class Base>
 struct is_template_base_of_impl {
@@ -659,10 +692,10 @@
 #define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...)
 #else
 using expand_side_effects = bool[];
-#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false }
+#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (void)pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false }
 #endif
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// C++ bindings of builtin Python exceptions
 class builtin_exception : public std::runtime_error {
@@ -694,7 +727,7 @@
 
 template <typename T, typename SFINAE = void> struct format_descriptor { };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns the index of the given type in the type char array below, and in the list in numpy.h
 // The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double;
 // complex float,double,long double.  Note that the long double types only participate when long
@@ -707,7 +740,7 @@
         std::is_integral<T>::value ? detail::log2(sizeof(T))*2 + std::is_unsigned<T>::value : 8 + (
         std::is_same<T, double>::value ? 1 : std::is_same<T, long double>::value ? 2 : 0));
 };
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template <typename T> struct format_descriptor<T, detail::enable_if_t<std::is_arithmetic<T>::value>> {
     static constexpr const char c = "?bBhHiIqQfdg"[detail::is_fmt_numeric<T>::index];
@@ -732,10 +765,10 @@
 /// Dummy destructor wrapper that can be used to expose classes with a private destructor
 struct nodelete { template <typename T> void operator()(T*) { } };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template <typename... Args>
 struct overload_cast_impl {
-    constexpr overload_cast_impl() {} // MSVC 2015 needs this
+    constexpr overload_cast_impl() {}; // NOLINT(modernize-use-equals-default):  MSVC 2015 needs this
 
     template <typename Return>
     constexpr auto operator()(Return (*pf)(Args...)) const noexcept
@@ -749,7 +782,7 @@
     constexpr auto operator()(Return (Class::*pmf)(Args...) const, std::true_type) const noexcept
                               -> decltype(pmf) { return pmf; }
 };
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // overload_cast requires variable templates: C++14
 #if defined(PYBIND11_CPP14)
@@ -774,7 +807,7 @@
 };
 #endif // overload_cast
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Adaptor for converting arbitrary container arguments into a vector; implicitly convertible from
 // any standard container (or C-style array) supporting std::begin/std::end, any singleton
@@ -813,8 +846,8 @@
     const std::vector<T> *operator->() const { return &v; }
 };
 
-NAMESPACE_END(detail)
+// Forward-declaration; see detail/class.h
+std::string get_fully_qualified_tp_name(PyTypeObject*);
 
-
-
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/detail/descr.h b/include/pybind11/detail/descr.h
index 8d404e5..92720cd 100644
--- a/include/pybind11/detail/descr.h
+++ b/include/pybind11/detail/descr.h
@@ -11,8 +11,8 @@
 
 #include "common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 #if !defined(_MSC_VER)
 #  define PYBIND11_DESCR_CONSTEXPR static constexpr
@@ -96,5 +96,5 @@
     return _("{") + descr + _("}");
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/detail/init.h b/include/pybind11/detail/init.h
index acfe00b..3ef78c1 100644
--- a/include/pybind11/detail/init.h
+++ b/include/pybind11/detail/init.h
@@ -11,8 +11,8 @@
 
 #include "class.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <>
 class type_caster<value_and_holder> {
@@ -30,7 +30,7 @@
     value_and_holder *value = nullptr;
 };
 
-NAMESPACE_BEGIN(initimpl)
+PYBIND11_NAMESPACE_BEGIN(initimpl)
 
 inline void no_nullptr(void *ptr) {
     if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr");
@@ -132,6 +132,7 @@
 template <typename Class>
 void construct(value_and_holder &v_h, Holder<Class> holder, bool need_alias) {
     auto *ptr = holder_helper<Holder<Class>>::get(holder);
+    no_nullptr(ptr);
     // If we need an alias, check that the held pointer is actually an alias instance
     if (Class::has_alias && need_alias && !is_alias<Class>(ptr))
         throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance "
@@ -330,6 +331,6 @@
     }
 };
 
-NAMESPACE_END(initimpl)
-NAMESPACE_END(detail)
-NAMESPACE_END(pybind11)
+PYBIND11_NAMESPACE_END(initimpl)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(pybind11)
diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h
index 6224dfb..a455715 100644
--- a/include/pybind11/detail/internals.h
+++ b/include/pybind11/detail/internals.h
@@ -11,8 +11,8 @@
 
 #include "../pytypes.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Forward declarations
 inline PyTypeObject *make_static_property_type();
 inline PyTypeObject *make_default_metaclass();
@@ -82,10 +82,10 @@
 template <typename value_type>
 using type_map = std::unordered_map<std::type_index, value_type, type_hash, type_equal_to>;
 
-struct overload_hash {
+struct override_hash {
     inline size_t operator()(const std::pair<const PyObject *, const char *>& v) const {
         size_t value = std::hash<const void *>()(v.first);
-        value ^= std::hash<const void *>()(v.second)  + 0x9e3779b9 + (value<<6) + (value>>2);
+        value ^= std::hash<const void *>()(v.second) + 0x9e3779b9 + (value<<6) + (value>>2);
         return value;
     }
 };
@@ -97,7 +97,7 @@
     type_map<type_info *> registered_types_cpp; // std::type_index -> pybind11's type information
     std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py; // PyTypeObject* -> base type_info(s)
     std::unordered_multimap<const void *, instance*> registered_instances; // void * -> instance*
-    std::unordered_set<std::pair<const PyObject *, const char *>, overload_hash> inactive_overload_cache;
+    std::unordered_set<std::pair<const PyObject *, const char *>, override_hash> inactive_override_cache;
     type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
     std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
     std::forward_list<void (*) (std::exception_ptr)> registered_exception_translators;
@@ -154,49 +154,60 @@
 
 /// On MSVC, debug and release builds are not ABI-compatible!
 #if defined(_MSC_VER) && defined(_DEBUG)
-#   define PYBIND11_BUILD_TYPE "_debug"
+#  define PYBIND11_BUILD_TYPE "_debug"
 #else
-#   define PYBIND11_BUILD_TYPE ""
+#  define PYBIND11_BUILD_TYPE ""
 #endif
 
 /// Let's assume that different compilers are ABI-incompatible.
-#if defined(_MSC_VER)
-#   define PYBIND11_COMPILER_TYPE "_msvc"
-#elif defined(__INTEL_COMPILER)
-#   define PYBIND11_COMPILER_TYPE "_icc"
-#elif defined(__clang__)
-#   define PYBIND11_COMPILER_TYPE "_clang"
-#elif defined(__PGI)
-#   define PYBIND11_COMPILER_TYPE "_pgi"
-#elif defined(__MINGW32__)
-#   define PYBIND11_COMPILER_TYPE "_mingw"
-#elif defined(__CYGWIN__)
-#   define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
-#elif defined(__GNUC__)
-#   define PYBIND11_COMPILER_TYPE "_gcc"
-#else
-#   define PYBIND11_COMPILER_TYPE "_unknown"
+/// A user can manually set this string if they know their
+/// compiler is compatible.
+#ifndef PYBIND11_COMPILER_TYPE
+#  if defined(_MSC_VER)
+#    define PYBIND11_COMPILER_TYPE "_msvc"
+#  elif defined(__INTEL_COMPILER)
+#    define PYBIND11_COMPILER_TYPE "_icc"
+#  elif defined(__clang__)
+#    define PYBIND11_COMPILER_TYPE "_clang"
+#  elif defined(__PGI)
+#    define PYBIND11_COMPILER_TYPE "_pgi"
+#  elif defined(__MINGW32__)
+#    define PYBIND11_COMPILER_TYPE "_mingw"
+#  elif defined(__CYGWIN__)
+#    define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
+#  elif defined(__GNUC__)
+#    define PYBIND11_COMPILER_TYPE "_gcc"
+#  else
+#    define PYBIND11_COMPILER_TYPE "_unknown"
+#  endif
 #endif
 
-#if defined(_LIBCPP_VERSION)
-#  define PYBIND11_STDLIB "_libcpp"
-#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
-#  define PYBIND11_STDLIB "_libstdcpp"
-#else
-#  define PYBIND11_STDLIB ""
+/// Also standard libs
+#ifndef PYBIND11_STDLIB
+#  if defined(_LIBCPP_VERSION)
+#    define PYBIND11_STDLIB "_libcpp"
+#  elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#    define PYBIND11_STDLIB "_libstdcpp"
+#  else
+#    define PYBIND11_STDLIB ""
+#  endif
 #endif
 
 /// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
-#if defined(__GXX_ABI_VERSION)
-#  define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
-#else
-#  define PYBIND11_BUILD_ABI ""
+#ifndef PYBIND11_BUILD_ABI
+#  if defined(__GXX_ABI_VERSION)
+#    define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#  else
+#    define PYBIND11_BUILD_ABI ""
+#  endif
 #endif
 
-#if defined(WITH_THREAD)
-#  define PYBIND11_INTERNALS_KIND ""
-#else
-#  define PYBIND11_INTERNALS_KIND "_without_thread"
+#ifndef PYBIND11_INTERNALS_KIND
+#  if defined(WITH_THREAD)
+#    define PYBIND11_INTERNALS_KIND ""
+#  else
+#    define PYBIND11_INTERNALS_KIND "_without_thread"
+#  endif
 #endif
 
 #define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \
@@ -273,7 +284,10 @@
         auto *&internals_ptr = *internals_pp;
         internals_ptr = new internals();
 #if defined(WITH_THREAD)
-        PyEval_InitThreads();
+
+        #if PY_VERSION_HEX < 0x03090000
+                PyEval_InitThreads();
+        #endif
         PyThreadState *tstate = PyThreadState_Get();
         #if PY_VERSION_HEX >= 0x03070000
             internals_ptr->tstate = PyThread_tss_alloc();
@@ -314,7 +328,7 @@
     return strings.front().c_str();
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Returns a named pointer that is shared among all extension modules (using the same
 /// pybind11 version) running in the current interpreter. Names starting with underscores
@@ -346,4 +360,4 @@
     return *ptr;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/detail/typeid.h b/include/pybind11/detail/typeid.h
index 9c8a4fc..148889f 100644
--- a/include/pybind11/detail/typeid.h
+++ b/include/pybind11/detail/typeid.h
@@ -18,8 +18,8 @@
 
 #include "common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /// Erase all occurrences of a substring
 inline void erase_all(std::string &string, const std::string &search) {
     for (size_t pos = 0;;) {
@@ -43,7 +43,7 @@
 #endif
     detail::erase_all(name, "pybind11::");
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Return a string representation of a C++ type
 template <typename T> static std::string type_id() {
@@ -52,4 +52,4 @@
     return name;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/eigen.h b/include/pybind11/eigen.h
index d963d96..e8c6f63 100644
--- a/include/pybind11/eigen.h
+++ b/include/pybind11/eigen.h
@@ -41,14 +41,14 @@
 // of matrices seems highly undesirable.
 static_assert(EIGEN_VERSION_AT_LEAST(3,2,7), "Eigen support in pybind11 requires Eigen >= 3.2.7");
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 // Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides:
 using EigenDStride = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
 template <typename MatrixType> using EigenDRef = Eigen::Ref<MatrixType, 0, EigenDStride>;
 template <typename MatrixType> using EigenDMap = Eigen::Map<MatrixType, 0, EigenDStride>;
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 #if EIGEN_VERSION_AT_LEAST(3,3,0)
 using EigenIndex = Eigen::Index;
@@ -432,7 +432,7 @@
         if (!need_copy) {
             // We don't need a converting copy, but we also need to check whether the strides are
             // compatible with the Ref's stride requirements
-            Array aref = reinterpret_borrow<Array>(src);
+            auto aref = reinterpret_borrow<Array>(src);
 
             if (aref && (!need_writeable || aref.writeable())) {
                 fits = props::conformable(aref);
@@ -539,9 +539,9 @@
 
 template<typename Type>
 struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
-    typedef typename Type::Scalar Scalar;
-    typedef remove_reference_t<decltype(*std::declval<Type>().outerIndexPtr())> StorageIndex;
-    typedef typename Type::Index Index;
+    using Scalar = typename Type::Scalar;
+    using StorageIndex = remove_reference_t<decltype(*std::declval<Type>().outerIndexPtr())>;
+    using Index = typename Type::Index;
     static constexpr bool rowMajor = Type::IsRowMajor;
 
     bool load(handle src, bool) {
@@ -549,11 +549,11 @@
             return false;
 
         auto obj = reinterpret_borrow<object>(src);
-        object sparse_module = module::import("scipy.sparse");
+        object sparse_module = module_::import("scipy.sparse");
         object matrix_type = sparse_module.attr(
             rowMajor ? "csr_matrix" : "csc_matrix");
 
-        if (!obj.get_type().is(matrix_type)) {
+        if (!type::handle_of(obj).is(matrix_type)) {
             try {
                 obj = matrix_type(obj);
             } catch (const error_already_set &) {
@@ -580,7 +580,7 @@
     static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
         const_cast<Type&>(src).makeCompressed();
 
-        object matrix_type = module::import("scipy.sparse").attr(
+        object matrix_type = module_::import("scipy.sparse").attr(
             rowMajor ? "csr_matrix" : "csc_matrix");
 
         array data(src.nonZeros(), src.valuePtr());
@@ -597,8 +597,8 @@
             + npy_format_descriptor<Scalar>::name + _("]"));
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(__GNUG__) || defined(__clang__)
 #  pragma GCC diagnostic pop
diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h
index f814c78..204aaf9 100644
--- a/include/pybind11/embed.h
+++ b/include/pybind11/embed.h
@@ -45,29 +45,28 @@
             });
         }
  \endrst */
-#define PYBIND11_EMBEDDED_MODULE(name, variable)                              \
-    static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &);    \
-    static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() {        \
-        auto m = pybind11::module(PYBIND11_TOSTRING(name));                   \
-        try {                                                                 \
-            PYBIND11_CONCAT(pybind11_init_, name)(m);                         \
-            return m.ptr();                                                   \
-        } catch (pybind11::error_already_set &e) {                            \
-            PyErr_SetString(PyExc_ImportError, e.what());                     \
-            return nullptr;                                                   \
-        } catch (const std::exception &e) {                                   \
-            PyErr_SetString(PyExc_ImportError, e.what());                     \
-            return nullptr;                                                   \
-        }                                                                     \
-    }                                                                         \
-    PYBIND11_EMBEDDED_MODULE_IMPL(name)                                       \
-    pybind11::detail::embedded_module name(PYBIND11_TOSTRING(name),           \
-                               PYBIND11_CONCAT(pybind11_init_impl_, name));   \
-    void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable)
+#define PYBIND11_EMBEDDED_MODULE(name, variable)                                \
+    static ::pybind11::module_::module_def                                      \
+        PYBIND11_CONCAT(pybind11_module_def_, name);                            \
+    static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &);   \
+    static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() {          \
+        auto m = ::pybind11::module_::create_extension_module(                  \
+            PYBIND11_TOSTRING(name), nullptr,                                   \
+            &PYBIND11_CONCAT(pybind11_module_def_, name));                      \
+        try {                                                                   \
+            PYBIND11_CONCAT(pybind11_init_, name)(m);                           \
+            return m.ptr();                                                     \
+        } PYBIND11_CATCH_INIT_EXCEPTIONS                                        \
+    }                                                                           \
+    PYBIND11_EMBEDDED_MODULE_IMPL(name)                                         \
+    ::pybind11::detail::embedded_module PYBIND11_CONCAT(pybind11_module_, name) \
+                              (PYBIND11_TOSTRING(name),                         \
+                               PYBIND11_CONCAT(pybind11_init_impl_, name));     \
+    void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &variable)
 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks.
 struct embedded_module {
@@ -86,7 +85,7 @@
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     Initialize the Python interpreter. No other pybind11 or CPython API functions can be
@@ -108,7 +107,7 @@
     Py_InitializeEx(init_signal_handlers ? 1 : 0);
 
     // Make .py files in the working directory available by default
-    module::import("sys").attr("path").cast<list>().append(".");
+    module_::import("sys").attr("path").cast<list>().append(".");
 }
 
 /** \rst
@@ -199,4 +198,4 @@
     bool is_valid = true;
 };
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/eval.h b/include/pybind11/eval.h
index ea85ba1..fa6b8af 100644
--- a/include/pybind11/eval.h
+++ b/include/pybind11/eval.h
@@ -13,7 +13,23 @@
 
 #include "pybind11.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+inline void ensure_builtins_in_globals(object &global) {
+    #if PY_VERSION_HEX < 0x03080000
+        // Running exec and eval on Python 2 and 3 adds `builtins` module under
+        // `__builtins__` key to globals if not yet present.
+        // Python 3.8 made PyRun_String behave similarly. Let's also do that for
+        // older versions, for consistency.
+        if (!global.contains("__builtins__"))
+            global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE);
+    #else
+        (void) global;
+    #endif
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 enum eval_mode {
     /// Evaluate a string containing an isolated expression
@@ -31,6 +47,8 @@
     if (!local)
         local = global;
 
+    detail::ensure_builtins_in_globals(global);
+
     /* PyRun_String does not accept a PyObject / encoding specifier,
        this seems to be the only alternative */
     std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
@@ -52,7 +70,7 @@
 template <eval_mode mode = eval_expr, size_t N>
 object eval(const char (&s)[N], object global = globals(), object local = object()) {
     /* Support raw string literals by removing common leading whitespace */
-    auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s))
+    auto expr = (s[0] == '\n') ? str(module_::import("textwrap").attr("dedent")(s))
                                : str(s);
     return eval<mode>(expr, global, local);
 }
@@ -66,11 +84,27 @@
     eval<eval_statements>(s, global, local);
 }
 
+#if defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03000000
+template <eval_mode mode = eval_statements>
+object eval_file(str, object, object) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+template <eval_mode mode = eval_statements>
+object eval_file(str, object) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+template <eval_mode mode = eval_statements>
+object eval_file(str) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+#else
 template <eval_mode mode = eval_statements>
 object eval_file(str fname, object global = globals(), object local = object()) {
     if (!local)
         local = global;
 
+    detail::ensure_builtins_in_globals(global);
+
     int start;
     switch (mode) {
         case eval_expr:             start = Py_eval_input;   break;
@@ -113,5 +147,6 @@
         throw error_already_set();
     return reinterpret_steal<object>(result);
 }
+#endif
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/functional.h b/include/pybind11/functional.h
index f8bda64..92c17dc 100644
--- a/include/pybind11/functional.h
+++ b/include/pybind11/functional.h
@@ -12,8 +12,8 @@
 #include "pybind11.h"
 #include <functional>
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <typename Return, typename... Args>
 struct type_caster<std::function<Return(Args...)>> {
@@ -58,7 +58,10 @@
         struct func_handle {
             function f;
             func_handle(function&& f_) : f(std::move(f_)) {}
-            func_handle(const func_handle&) = default;
+            func_handle(const func_handle& f_) {
+                gil_scoped_acquire acq;
+                f = f_.f;
+            }
             ~func_handle() {
                 gil_scoped_acquire acq;
                 function kill_f(std::move(f));
@@ -97,5 +100,5 @@
                                + make_caster<retval_type>::name + _("]"));
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/iostream.h b/include/pybind11/iostream.h
index c43b7c9..5e9a814 100644
--- a/include/pybind11/iostream.h
+++ b/include/pybind11/iostream.h
@@ -17,8 +17,8 @@
 #include <memory>
 #include <iostream>
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Buffer that writes to Python instead of C++
 class pythonbuf : public std::streambuf {
@@ -30,7 +30,7 @@
     object pywrite;
     object pyflush;
 
-    int overflow(int c) {
+    int overflow(int c) override {
         if (!traits_type::eq_int_type(c, traits_type::eof())) {
             *pptr() = traits_type::to_char_type(c);
             pbump(1);
@@ -38,7 +38,10 @@
         return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
     }
 
-    int sync() {
+    // This function must be non-virtual to be called in a destructor. If the
+    // rare MSVC test failure shows up with this version, then this should be
+    // simplified to a fully qualified call.
+    int _sync() {
         if (pbase() != pptr()) {
             // This subtraction cannot be negative, so dropping the sign
             str line(pbase(), static_cast<size_t>(pptr() - pbase()));
@@ -54,6 +57,10 @@
         return 0;
     }
 
+    int sync() override {
+        return _sync();
+    }
+
 public:
 
     pythonbuf(object pyostream, size_t buffer_size = 1024)
@@ -67,12 +74,12 @@
     pythonbuf(pythonbuf&&) = default;
 
     /// Sync before destroy
-    ~pythonbuf() {
-        sync();
+    ~pythonbuf() override {
+        _sync();
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 
 /** \rst
@@ -95,7 +102,7 @@
     .. code-block:: cpp
 
         {
-            py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")};
+            py::scoped_ostream_redirect output{std::cerr, py::module_::import("sys").attr("stderr")};
             std::cerr << "Hello, World!";
         }
  \endrst */
@@ -108,7 +115,7 @@
 public:
     scoped_ostream_redirect(
             std::ostream &costream = std::cout,
-            object pyostream = module::import("sys").attr("stdout"))
+            object pyostream = module_::import("sys").attr("stdout"))
         : costream(costream), buffer(pyostream) {
         old = costream.rdbuf(&buffer);
     }
@@ -139,12 +146,12 @@
 public:
     scoped_estream_redirect(
             std::ostream &costream = std::cerr,
-            object pyostream = module::import("sys").attr("stderr"))
+            object pyostream = module_::import("sys").attr("stderr"))
         : scoped_ostream_redirect(costream,pyostream) {}
 };
 
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Class to redirect output as a context manager. C++ backend.
 class OstreamRedirect {
@@ -170,7 +177,7 @@
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     This is a helper function to add a C++ redirect context manager to Python
@@ -199,11 +206,11 @@
             m.noisy_function_with_error_printing()
 
  \endrst */
-inline class_<detail::OstreamRedirect> add_ostream_redirect(module m, std::string name = "ostream_redirect") {
+inline class_<detail::OstreamRedirect> add_ostream_redirect(module_ m, std::string name = "ostream_redirect") {
     return class_<detail::OstreamRedirect>(m, name.c_str(), module_local())
         .def(init<bool,bool>(), arg("stdout")=true, arg("stderr")=true)
         .def("__enter__", &detail::OstreamRedirect::enter)
         .def("__exit__", [](detail::OstreamRedirect &self_, args) { self_.exit(); });
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h
index ba41a22..019f568 100644
--- a/include/pybind11/numpy.h
+++ b/include/pybind11/numpy.h
@@ -20,6 +20,7 @@
 #include <sstream>
 #include <string>
 #include <functional>
+#include <type_traits>
 #include <utility>
 #include <vector>
 #include <typeindex>
@@ -33,13 +34,18 @@
    whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size
    and dimension types (e.g. shape, strides, indexing), instead of inflicting this
    upon the library user. */
-static_assert(sizeof(ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
+static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
+static_assert(std::is_signed<Py_intptr_t>::value, "Py_intptr_t must be signed");
+// We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares)
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 class array; // Forward declaration
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+template <> struct handle_type_name<array> { static constexpr auto name = _("numpy.ndarray"); };
+
 template <typename type, typename SFINAE = void> struct npy_format_descriptor;
 
 struct PyArrayDescr_Proxy {
@@ -178,8 +184,9 @@
     unsigned int (*PyArray_GetNDArrayCFeatureVersion_)();
     PyObject *(*PyArray_DescrFromType_)(int);
     PyObject *(*PyArray_NewFromDescr_)
-        (PyTypeObject *, PyObject *, int, Py_intptr_t *,
-         Py_intptr_t *, void *, int, PyObject *);
+        (PyTypeObject *, PyObject *, int, Py_intptr_t const *,
+         Py_intptr_t const *, void *, int, PyObject *);
+    // Unused. Not removed because that affects ABI of the class.
     PyObject *(*PyArray_DescrNewFromType_)(int);
     int (*PyArray_CopyInto_)(PyObject *, PyObject *);
     PyObject *(*PyArray_NewCopy_)(PyObject *, int);
@@ -190,9 +197,10 @@
     PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *);
     int (*PyArray_DescrConverter_) (PyObject *, PyObject **);
     bool (*PyArray_EquivTypes_) (PyObject *, PyObject *);
-    int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, char, PyObject **, int *,
-                                             Py_ssize_t *, PyObject **, PyObject *);
+    int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, unsigned char, PyObject **, int *,
+                                             Py_intptr_t *, PyObject **, PyObject *);
     PyObject *(*PyArray_Squeeze_)(PyObject *);
+    // Unused. Not removed because that affects ABI of the class.
     int (*PyArray_SetBaseObject_)(PyObject *, PyObject *);
     PyObject* (*PyArray_Resize_)(PyObject*, PyArray_Dims*, int, int);
 private:
@@ -208,7 +216,7 @@
         API_PyArray_CopyInto = 82,
         API_PyArray_NewCopy = 85,
         API_PyArray_NewFromDescr = 94,
-        API_PyArray_DescrNewFromType = 9,
+        API_PyArray_DescrNewFromType = 96,
         API_PyArray_DescrConverter = 174,
         API_PyArray_EquivTypes = 182,
         API_PyArray_GetArrayParamsFromObject = 278,
@@ -217,7 +225,7 @@
     };
 
     static npy_api lookup() {
-        module m = module::import("numpy.core.multiarray");
+        module_ m = module_::import("numpy.core.multiarray");
         auto c = m.attr("_ARRAY_API");
 #if PY_MAJOR_VERSION >= 3
         void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), NULL);
@@ -276,7 +284,7 @@
 template <typename T> struct is_complex<std::complex<T>> : std::true_type { };
 
 template <typename T> struct array_info_scalar {
-    typedef T type;
+    using type = T;
     static constexpr bool is_array = false;
     static constexpr bool is_empty = false;
     static constexpr auto extents = _("");
@@ -323,6 +331,12 @@
     satisfies_none_of<T, std::is_reference, std::is_array, is_std_array, std::is_arithmetic, is_complex, std::is_enum>
 >;
 
+// Replacement for std::is_pod (deprecated in C++20)
+template <typename T> using is_pod = all_of<
+    std::is_standard_layout<T>,
+    std::is_trivial<T>
+>;
+
 template <ssize_t Dim = 0, typename Strides> ssize_t byte_offset_unsafe(const Strides &) { return 0; }
 template <ssize_t Dim = 0, typename Strides, typename... Ix>
 ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) {
@@ -414,6 +428,10 @@
     using ConstBase::ConstBase;
     using ConstBase::Dynamic;
 public:
+    // Bring in const-qualified versions from base class
+    using ConstBase::operator();
+    using ConstBase::operator[];
+
     /// Mutable, unchecked access to data at the given indices.
     template <typename... Ix> T& operator()(Ix... index) {
         static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic,
@@ -439,7 +457,7 @@
 template <typename T, ssize_t Dim>
 struct type_caster<unchecked_mutable_reference<T, Dim>> : type_caster<unchecked_reference<T, Dim>> {};
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 class dtype : public object {
 public:
@@ -496,7 +514,7 @@
 
 private:
     static object _dtype_from_pep3118() {
-        static PyObject *obj = module::import("numpy.core._internal")
+        static PyObject *obj = module_::import("numpy.core._internal")
             .attr("_dtype_from_pep3118").cast<object>().release().ptr();
         return reinterpret_borrow<object>(obj);
     }
@@ -545,7 +563,7 @@
         forcecast = detail::npy_api::NPY_ARRAY_FORCECAST_
     };
 
-    array() : array({{0}}, static_cast<const double *>(nullptr)) {}
+    array() : array(0, static_cast<const double *>(nullptr)) {}
 
     using ShapeContainer = detail::any_container<ssize_t>;
     using StridesContainer = detail::any_container<ssize_t>;
@@ -555,7 +573,7 @@
           const void *ptr = nullptr, handle base = handle()) {
 
         if (strides->empty())
-            *strides = c_strides(*shape, dt.itemsize());
+            *strides = detail::c_strides(*shape, dt.itemsize());
 
         auto ndim = shape->size();
         if (ndim != strides->size())
@@ -574,7 +592,10 @@
 
         auto &api = detail::npy_api::get();
         auto tmp = reinterpret_steal<object>(api.PyArray_NewFromDescr_(
-            api.PyArray_Type_, descr.release().ptr(), (int) ndim, shape->data(), strides->data(),
+            api.PyArray_Type_, descr.release().ptr(), (int) ndim,
+            // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
+            reinterpret_cast<Py_intptr_t*>(shape->data()),
+            reinterpret_cast<Py_intptr_t*>(strides->data()),
             const_cast<void *>(ptr), flags, nullptr));
         if (!tmp)
             throw error_already_set();
@@ -606,8 +627,8 @@
     template <typename T>
     explicit array(ssize_t count, const T *ptr, handle base = handle()) : array({count}, {}, ptr, base) { }
 
-    explicit array(const buffer_info &info)
-    : array(pybind11::dtype(info), info.shape, info.strides, info.ptr) { }
+    explicit array(const buffer_info &info, handle base = handle())
+    : array(pybind11::dtype(info), info.shape, info.strides, info.ptr, base) { }
 
     /// Array descriptor (dtype)
     pybind11::dtype dtype() const {
@@ -746,10 +767,12 @@
     /// then resize will succeed only if it makes a reshape, i.e. original size doesn't change
     void resize(ShapeContainer new_shape, bool refcheck = true) {
         detail::npy_api::PyArray_Dims d = {
-            new_shape->data(), int(new_shape->size())
+            // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
+            reinterpret_cast<Py_intptr_t*>(new_shape->data()),
+            int(new_shape->size())
         };
         // try to resize, set ordering param to -1 cause it's not used anyway
-        object new_array = reinterpret_steal<object>(
+        auto new_array = reinterpret_steal<object>(
             detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1)
         );
         if (!new_array) throw error_already_set();
@@ -783,25 +806,6 @@
             throw std::domain_error("array is not writeable");
     }
 
-    // Default, C-style strides
-    static std::vector<ssize_t> c_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
-        auto ndim = shape.size();
-        std::vector<ssize_t> strides(ndim, itemsize);
-        if (ndim > 0)
-            for (size_t i = ndim - 1; i > 0; --i)
-                strides[i - 1] = strides[i] * shape[i];
-        return strides;
-    }
-
-    // F-style strides; default when constructing an array_t with `ExtraFlags & f_style`
-    static std::vector<ssize_t> f_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
-        auto ndim = shape.size();
-        std::vector<ssize_t> strides(ndim, itemsize);
-        for (size_t i = 1; i < ndim; ++i)
-            strides[i] = strides[i - 1] * shape[i - 1];
-        return strides;
-    }
-
     template<typename... Ix> void check_dimensions(Ix... index) const {
         check_dimensions_impl(ssize_t(0), shape(), ssize_t(index)...);
     }
@@ -853,17 +857,19 @@
         if (!m_ptr) throw error_already_set();
     }
 
-    explicit array_t(const buffer_info& info) : array(info) { }
+    explicit array_t(const buffer_info& info, handle base = handle()) : array(info, base) { }
 
     array_t(ShapeContainer shape, StridesContainer strides, const T *ptr = nullptr, handle base = handle())
         : array(std::move(shape), std::move(strides), ptr, base) { }
 
     explicit array_t(ShapeContainer shape, const T *ptr = nullptr, handle base = handle())
         : array_t(private_ctor{}, std::move(shape),
-                ExtraFlags & f_style ? f_strides(*shape, itemsize()) : c_strides(*shape, itemsize()),
+                ExtraFlags & f_style
+                ? detail::f_strides(*shape, itemsize())
+                : detail::c_strides(*shape, itemsize()),
                 ptr, base) { }
 
-    explicit array_t(size_t count, const T *ptr = nullptr, handle base = handle())
+    explicit array_t(ssize_t count, const T *ptr = nullptr, handle base = handle())
         : array({count}, {}, ptr, base) { }
 
     constexpr ssize_t itemsize() const {
@@ -929,7 +935,8 @@
     static bool check_(handle h) {
         const auto &api = detail::npy_api::get();
         return api.PyArray_Check_(h.ptr())
-               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of<T>().ptr());
+               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of<T>().ptr())
+               && detail::check_flags(h.ptr(), ExtraFlags & (array::c_style | array::f_style));
     }
 
 protected:
@@ -976,7 +983,7 @@
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template <typename T, int ExtraFlags>
 struct pyobject_caster<array_t<T, ExtraFlags>> {
     using type = array_t<T, ExtraFlags>;
@@ -1007,14 +1014,14 @@
 template <typename T>
 struct npy_format_descriptor_name<T, enable_if_t<std::is_integral<T>::value>> {
     static constexpr auto name = _<std::is_same<T, bool>::value>(
-        _("bool"), _<std::is_signed<T>::value>("int", "uint") + _<sizeof(T)*8>()
+        _("bool"), _<std::is_signed<T>::value>("numpy.int", "numpy.uint") + _<sizeof(T)*8>()
     );
 };
 
 template <typename T>
 struct npy_format_descriptor_name<T, enable_if_t<std::is_floating_point<T>::value>> {
     static constexpr auto name = _<std::is_same<T, float>::value || std::is_same<T, double>::value>(
-        _("float") + _<sizeof(T)*8>(), _("longdouble")
+        _("numpy.float") + _<sizeof(T)*8>(), _("numpy.longdouble")
     );
 };
 
@@ -1022,7 +1029,7 @@
 struct npy_format_descriptor_name<T, enable_if_t<is_complex<T>::value>> {
     static constexpr auto name = _<std::is_same<typename T::value_type, float>::value
                                    || std::is_same<typename T::value_type, double>::value>(
-        _("complex") + _<sizeof(typename T::value_type)*16>(), _("longcomplex")
+        _("numpy.complex") + _<sizeof(typename T::value_type)*16>(), _("numpy.longcomplex")
     );
 };
 
@@ -1218,7 +1225,7 @@
 #define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT
 #define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0 (test, next, 0)
 #define PYBIND11_MAP_NEXT(test, next)  PYBIND11_MAP_NEXT1 (PYBIND11_MAP_GET_END test, next)
-#ifdef _MSC_VER // MSVC is not as eager to expand macros, hence this workaround
+#if defined(_MSC_VER) && !defined(__clang__) // MSVC is not as eager to expand macros, hence this workaround
 #define PYBIND11_MAP_LIST_NEXT1(test, next) \
     PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
 #else
@@ -1240,7 +1247,7 @@
         (::std::vector<::pybind11::detail::field_descriptor> \
          {PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)})
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(__clang__)
 #define PYBIND11_MAP2_LIST_NEXT1(test, next) \
     PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
 #else
@@ -1264,19 +1271,6 @@
 
 #endif // __CLION_IDE__
 
-template  <class T>
-using array_iterator = typename std::add_pointer<T>::type;
-
-template <class T>
-array_iterator<T> array_begin(const buffer_info& buffer) {
-    return array_iterator<T>(reinterpret_cast<T*>(buffer.ptr));
-}
-
-template <class T>
-array_iterator<T> array_end(const buffer_info& buffer) {
-    return array_iterator<T>(reinterpret_cast<T*>(buffer.ptr) + buffer.size);
-}
-
 class common_iterator {
 public:
     using container_type = std::vector<ssize_t>;
@@ -1290,7 +1284,7 @@
         m_strides.back() = static_cast<value_type>(strides.back());
         for (size_type i = m_strides.size() - 1; i != 0; --i) {
             size_type j = i - 1;
-            value_type s = static_cast<value_type>(shape[i]);
+            auto s = static_cast<value_type>(shape[i]);
             m_strides[j] = strides[j] + m_strides[i] - strides[i] * s;
         }
     }
@@ -1468,7 +1462,7 @@
     using call_type = remove_reference_t<T>;
     // Is this a vectorized argument?
     static constexpr bool vectorize =
-        satisfies_any_of<call_type, std::is_arithmetic, is_complex, std::is_pod>::value &&
+        satisfies_any_of<call_type, std::is_arithmetic, is_complex, is_pod>::value &&
         satisfies_none_of<call_type, std::is_pointer, std::is_array, is_std_array, std::is_enum>::value &&
         (!std::is_reference<T>::value ||
          (std::is_lvalue_reference<T>::value && std::is_const<call_type>::value));
@@ -1476,9 +1470,66 @@
     using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
 };
 
+
+// py::vectorize when a return type is present
+template <typename Func, typename Return, typename... Args>
+struct vectorize_returned_array {
+    using Type = array_t<Return>;
+
+    static Type create(broadcast_trivial trivial, const std::vector<ssize_t> &shape) {
+        if (trivial == broadcast_trivial::f_trivial)
+            return array_t<Return, array::f_style>(shape);
+        else
+            return array_t<Return>(shape);
+    }
+
+    static Return *mutable_data(Type &array) {
+        return array.mutable_data();
+    }
+
+    static Return call(Func &f, Args &... args) {
+        return f(args...);
+    }
+
+    static void call(Return *out, size_t i, Func &f, Args &... args) {
+        out[i] = f(args...);
+    }
+};
+
+// py::vectorize when a return type is not present
+template <typename Func, typename... Args>
+struct vectorize_returned_array<Func, void, Args...> {
+    using Type = none;
+
+    static Type create(broadcast_trivial, const std::vector<ssize_t> &) {
+        return none();
+    }
+
+    static void *mutable_data(Type &) {
+        return nullptr;
+    }
+
+    static detail::void_type call(Func &f, Args &... args) {
+        f(args...);
+        return {};
+    }
+
+    static void call(void *, size_t, Func &f, Args &... args) {
+        f(args...);
+    }
+};
+
+
 template <typename Func, typename Return, typename... Args>
 struct vectorize_helper {
+
+// NVCC for some reason breaks if NVectorized is private
+#ifdef __CUDACC__
+public:
+#else
 private:
+#endif
+
     static constexpr size_t N = sizeof...(Args);
     static constexpr size_t NVectorized = constexpr_sum(vectorize_arg<Args>::vectorize...);
     static_assert(NVectorized >= 1,
@@ -1503,6 +1554,8 @@
     using arg_call_types = std::tuple<typename vectorize_arg<Args>::call_type...>;
     template <size_t Index> using param_n_t = typename std::tuple_element<Index, arg_call_types>::type;
 
+    using returned_array = vectorize_returned_array<Func, Return, Args...>;
+
     // Runs a vectorized function given arguments tuple and three index sequences:
     //     - Index is the full set of 0 ... (N-1) argument indices;
     //     - VIndex is the subset of argument indices with vectorized parameters, letting us access
@@ -1526,7 +1579,7 @@
         ssize_t nd = 0;
         std::vector<ssize_t> shape(0);
         auto trivial = broadcast(buffers, nd, shape);
-        size_t ndim = (size_t) nd;
+        auto ndim = (size_t) nd;
 
         size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies<size_t>());
 
@@ -1534,20 +1587,19 @@
         // not wrapped in an array).
         if (size == 1 && ndim == 0) {
             PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr);
-            return cast(f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...));
+            return cast(returned_array::call(f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...));
         }
 
-        array_t<Return> result;
-        if (trivial == broadcast_trivial::f_trivial) result = array_t<Return, array::f_style>(shape);
-        else result = array_t<Return>(shape);
+        auto result = returned_array::create(trivial, shape);
 
         if (size == 0) return std::move(result);
 
         /* Call the function */
+        auto mutable_data = returned_array::mutable_data(result);
         if (trivial == broadcast_trivial::non_trivial)
-            apply_broadcast(buffers, params, result, i_seq, vi_seq, bi_seq);
+            apply_broadcast(buffers, params, mutable_data, size, shape, i_seq, vi_seq, bi_seq);
         else
-            apply_trivial(buffers, params, result.mutable_data(), size, i_seq, vi_seq, bi_seq);
+            apply_trivial(buffers, params, mutable_data, size, i_seq, vi_seq, bi_seq);
 
         return std::move(result);
     }
@@ -1570,7 +1622,7 @@
         }};
 
         for (size_t i = 0; i < size; ++i) {
-            out[i] = f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...);
+            returned_array::call(out, i, f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...);
             for (auto &x : vecparams) x.first += x.second;
         }
     }
@@ -1578,19 +1630,18 @@
     template <size_t... Index, size_t... VIndex, size_t... BIndex>
     void apply_broadcast(std::array<buffer_info, NVectorized> &buffers,
                          std::array<void *, N> &params,
-                         array_t<Return> &output_array,
+                         Return *out,
+                         size_t size,
+                         const std::vector<ssize_t> &output_shape,
                          index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
 
-        buffer_info output = output_array.request();
-        multi_array_iterator<NVectorized> input_iter(buffers, output.shape);
+        multi_array_iterator<NVectorized> input_iter(buffers, output_shape);
 
-        for (array_iterator<Return> iter = array_begin<Return>(output), end = array_end<Return>(output);
-             iter != end;
-             ++iter, ++input_iter) {
+        for (size_t i = 0; i < size; ++i, ++input_iter) {
             PYBIND11_EXPAND_SIDE_EFFECTS((
                 params[VIndex] = input_iter.template data<BIndex>()
             ));
-            *iter = f(*reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
+            returned_array::call(out, i, f, *reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
         }
     }
 };
@@ -1605,7 +1656,7 @@
     static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor<T>::name + _("]");
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // Vanilla pointer vectorizer:
 template <typename Return, typename... Args>
@@ -1635,7 +1686,7 @@
     return Helper(std::mem_fn(f));
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #pragma warning(pop)
diff --git a/include/pybind11/operators.h b/include/pybind11/operators.h
index b3dd62c..086cb4c 100644
--- a/include/pybind11/operators.h
+++ b/include/pybind11/operators.h
@@ -18,8 +18,8 @@
 #  pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Enumeration with all supported operator types
 enum op_id : int {
@@ -147,6 +147,9 @@
 PYBIND11_INPLACE_OPERATOR(ior,      operator|=,   l |= r)
 PYBIND11_UNARY_OPERATOR(neg,        operator-,    -l)
 PYBIND11_UNARY_OPERATOR(pos,        operator+,    +l)
+// WARNING: This usage of `abs` should only be done for existing STL overloads.
+// Adding overloads directly in to the `std::` namespace is advised against:
+// https://en.cppreference.com/w/cpp/language/extending_std
 PYBIND11_UNARY_OPERATOR(abs,        abs,          std::abs(l))
 PYBIND11_UNARY_OPERATOR(hash,       hash,         std::hash<L>()(l))
 PYBIND11_UNARY_OPERATOR(invert,     operator~,    (~l))
@@ -157,11 +160,13 @@
 #undef PYBIND11_BINARY_OPERATOR
 #undef PYBIND11_INPLACE_OPERATOR
 #undef PYBIND11_UNARY_OPERATOR
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 using detail::self;
+// Add named operators so that they are accessible via `py::`.
+using detail::hash;
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #  pragma warning(pop)
diff --git a/include/pybind11/options.h b/include/pybind11/options.h
index cc1e1f6..d74db1c 100644
--- a/include/pybind11/options.h
+++ b/include/pybind11/options.h
@@ -11,7 +11,7 @@
 
 #include "detail/common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 class options {
 public:
@@ -62,4 +62,4 @@
     state previous_state;
 };
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index d95d61f..e2ddda0 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -29,6 +29,7 @@
 #  pragma warning(disable: 4996) // warning C4996: The POSIX name for this item is deprecated. Instead, use the ISO C and C++ conformant name
 #  pragma warning(disable: 4702) // warning C4702: unreachable code
 #  pragma warning(disable: 4522) // warning C4522: multiple assignment operators specified
+#  pragma warning(disable: 4505) // warning C4505: 'PySlice_GetIndicesEx': unreferenced local function has been removed (PyPy only)
 #elif defined(__GNUG__) && !defined(__clang__)
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wunused-but-set-parameter"
@@ -46,16 +47,21 @@
 #include "detail/class.h"
 #include "detail/init.h"
 
+#include <memory>
+#include <vector>
+#include <string>
+#include <utility>
+
 #if defined(__GNUG__) && !defined(__clang__)
 #  include <cxxabi.h>
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /// Wraps an arbitrary C++ function/method/lambda function/.. into a callable Python object
 class cpp_function : public function {
 public:
-    cpp_function() { }
+    cpp_function() = default;
     cpp_function(std::nullptr_t) { }
 
     /// Construct a cpp_function from a vanilla function pointer
@@ -72,16 +78,34 @@
                    (detail::function_signature_t<Func> *) nullptr, extra...);
     }
 
-    /// Construct a cpp_function from a class method (non-const)
+    /// Construct a cpp_function from a class method (non-const, no ref-qualifier)
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) {
+        initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+                   (Return (*) (Class *, Arg...)) nullptr, extra...);
+    }
+
+    /// Construct a cpp_function from a class method (non-const, lvalue ref-qualifier)
+    /// A copy of the overload for non-const functions without explicit ref-qualifier
+    /// but with an added `&`.
+    template <typename Return, typename Class, typename... Arg, typename... Extra>
+    cpp_function(Return (Class::*f)(Arg...)&, const Extra&... extra) {
         initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); },
                    (Return (*) (Class *, Arg...)) nullptr, extra...);
     }
 
-    /// Construct a cpp_function from a class method (const)
+    /// Construct a cpp_function from a class method (const, no ref-qualifier)
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) {
+        initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+                   (Return (*)(const Class *, Arg ...)) nullptr, extra...);
+    }
+
+    /// Construct a cpp_function from a class method (const, lvalue ref-qualifier)
+    /// A copy of the overload for const functions without explicit ref-qualifier
+    /// but with an added `&`.
+    template <typename Return, typename Class, typename... Arg, typename... Extra>
+    cpp_function(Return (Class::*f)(Arg...) const&, const Extra&... extra) {
         initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); },
                    (Return (*)(const Class *, Arg ...)) nullptr, extra...);
     }
@@ -147,7 +171,7 @@
             /* Get a pointer to the capture object */
             auto data = (sizeof(capture) <= sizeof(call.func.data)
                          ? &call.func.data : call.func.data[0]);
-            capture *cap = const_cast<capture *>(reinterpret_cast<const capture *>(data));
+            auto *cap = const_cast<capture *>(reinterpret_cast<const capture *>(data));
 
             /* Override policy for rvalues -- usually to enforce rvp::move on an rvalue */
             return_value_policy policy = return_value_policy_override<Return>::policy(call.func.policy);
@@ -168,6 +192,16 @@
         /* Process any user-provided function attributes */
         process_attributes<Extra...>::init(extra..., rec);
 
+        {
+            constexpr bool has_kw_only_args = any_of<std::is_same<kw_only, Extra>...>::value,
+                           has_pos_only_args = any_of<std::is_same<pos_only, Extra>...>::value,
+                           has_args = any_of<std::is_same<args, Args>...>::value,
+                           has_arg_annotations = any_of<is_keyword<Extra>...>::value;
+            static_assert(has_arg_annotations || !has_kw_only_args, "py::kw_only requires the use of argument annotations");
+            static_assert(has_arg_annotations || !has_pos_only_args, "py::pos_only requires the use of argument annotations (for docstrings and aligning the annotations to the argument)");
+            static_assert(!(has_args && has_kw_only_args), "py::kw_only cannot be combined with a py::args argument");
+        }
+
         /* Generate a readable signature describing the function's arguments and return value types */
         static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name;
         PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
@@ -202,14 +236,14 @@
             if (a.descr)
                 a.descr = strdup(a.descr);
             else if (a.value)
-                a.descr = strdup(a.value.attr("__repr__")().cast<std::string>().c_str());
+                a.descr = strdup(repr(a.value).cast<std::string>().c_str());
         }
 
         rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__");
 
 #if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING)
         if (rec->is_constructor && !rec->is_new_style_constructor) {
-            const auto class_name = std::string(((PyTypeObject *) rec->scope.ptr())->tp_name);
+            const auto class_name = detail::get_fully_qualified_tp_name((PyTypeObject *) rec->scope.ptr());
             const auto func_name = std::string(rec->name);
             PyErr_WarnEx(
                 PyExc_FutureWarning,
@@ -231,7 +265,10 @@
                 // Write arg name for everything except *args and **kwargs.
                 if (*(pc + 1) == '*')
                     continue;
-
+                // Separator for keyword-only arguments, placed before the kw
+                // arguments start
+                if (rec->nargs_kw_only > 0 && arg_index + rec->nargs_kw_only == args)
+                    signature += "*, ";
                 if (arg_index < rec->args.size() && rec->args[arg_index].name) {
                     signature += rec->args[arg_index].name;
                 } else if (arg_index == 0 && rec->is_method) {
@@ -246,6 +283,10 @@
                     signature += " = ";
                     signature += rec->args[arg_index].descr;
                 }
+                // Separator for positional-only arguments (placed after the
+                // argument, rather than before like *
+                if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only)
+                    signature += ", /";
                 arg_index++;
             } else if (c == '%') {
                 const std::type_info *t = types[type_index++];
@@ -271,6 +312,7 @@
                 signature += c;
             }
         }
+
         if (arg_index != args || types[type_index] != nullptr)
             pybind11_fail("Internal error while parsing type signature (2)");
 
@@ -331,10 +373,9 @@
             if (!m_ptr)
                 pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
         } else {
-            /* Append at the end of the overload chain */
+            /* Append at the beginning or end of the overload chain */
             m_ptr = rec->sibling.ptr();
             inc_ref();
-            chain_start = chain;
             if (chain->is_method != rec->is_method)
                 pybind11_fail("overloading a method with both static and instance methods is not supported; "
                     #if defined(NDEBUG)
@@ -344,9 +385,22 @@
                         std::string(pybind11::str(rec->scope.attr("__name__"))) + "." + std::string(rec->name) + signature
                     #endif
                 );
-            while (chain->next)
-                chain = chain->next;
-            chain->next = rec;
+
+            if (rec->prepend) {
+                // Beginning of chain; we need to replace the capsule's current head-of-the-chain
+                // pointer with this one, then make this one point to the previous head of the
+                // chain.
+                chain_start = rec;
+                rec->next = chain;
+                auto rec_capsule = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
+                rec_capsule.set_pointer(rec);
+            } else {
+                // Or end of chain (normal behavior)
+                chain_start = chain;
+                while (chain->next)
+                    chain = chain->next;
+                chain->next = rec;
+            }
         }
 
         std::string signatures;
@@ -384,7 +438,7 @@
         }
 
         /* Install docstring */
-        PyCFunctionObject *func = (PyCFunctionObject *) m_ptr;
+        auto *func = (PyCFunctionObject *) m_ptr;
         if (func->m_ml->ml_doc)
             std::free(const_cast<char *>(func->m_ml->ml_doc));
         func->m_ml->ml_doc = strdup(signatures.c_str());
@@ -399,6 +453,12 @@
 
     /// When a cpp_function is GCed, release any memory allocated by pybind11
     static void destruct(detail::function_record *rec) {
+        // If on Python 3.9, check the interpreter "MICRO" (patch) version.
+        // If this is running on 3.9.0, we have to work around a bug.
+        #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+            static bool is_zero = Py_GetVersion()[4] == '0';
+        #endif
+
         while (rec) {
             detail::function_record *next = rec->next;
             if (rec->free_data)
@@ -413,7 +473,15 @@
             }
             if (rec->def) {
                 std::free(const_cast<char *>(rec->def->ml_doc));
-                delete rec->def;
+                // Python 3.9.0 decref's these in the wrong order; rec->def
+                // If loaded on 3.9.0, let these leak (use Python 3.9.1 at runtime to fix)
+                // See https://github.com/python/cpython/pull/22670
+                #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+                    if (!is_zero)
+                        delete rec->def;
+                #else
+                    delete rec->def;
+                #endif
             }
             delete rec;
             rec = next;
@@ -429,7 +497,7 @@
                               *it = overloads;
 
         /* Need to know how many arguments + keyword arguments there are to pick the right overload */
-        const size_t n_args_in = (size_t) PyTuple_GET_SIZE(args_in);
+        const auto n_args_in = (size_t) PyTuple_GET_SIZE(args_in);
 
         handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr,
                result = PYBIND11_TRY_NEXT_OVERLOAD;
@@ -483,15 +551,16 @@
                  */
 
                 const function_record &func = *it;
-                size_t pos_args = func.nargs;    // Number of positional arguments that we need
-                if (func.has_args) --pos_args;   // (but don't count py::args
-                if (func.has_kwargs) --pos_args; //  or py::kwargs)
+                size_t num_args = func.nargs;    // Number of positional arguments that we need
+                if (func.has_args) --num_args;   // (but don't count py::args
+                if (func.has_kwargs) --num_args; //  or py::kwargs)
+                size_t pos_args = num_args - func.nargs_kw_only;
 
                 if (!func.has_args && n_args_in > pos_args)
-                    continue; // Too many arguments for this overload
+                    continue; // Too many positional arguments for this overload
 
                 if (n_args_in < pos_args && func.args.size() < pos_args)
-                    continue; // Not enough arguments given, and not enough defaults to fill in the blanks
+                    continue; // Not enough positional arguments given, and not enough defaults to fill in the blanks
 
                 function_call call(func, parent);
 
@@ -506,7 +575,7 @@
                         self_value_and_holder.type->dealloc(self_value_and_holder);
 
                     call.init_self = PyTuple_GET_ITEM(args_in, 0);
-                    call.args.push_back(reinterpret_cast<PyObject *>(&self_value_and_holder));
+                    call.args.emplace_back(reinterpret_cast<PyObject *>(&self_value_and_holder));
                     call.args_convert.push_back(false);
                     ++args_copied;
                 }
@@ -534,16 +603,36 @@
                 // We'll need to copy this if we steal some kwargs for defaults
                 dict kwargs = reinterpret_borrow<dict>(kwargs_in);
 
+                // 1.5. Fill in any missing pos_only args from defaults if they exist
+                if (args_copied < func.nargs_pos_only) {
+                    for (; args_copied < func.nargs_pos_only; ++args_copied) {
+                        const auto &arg_rec = func.args[args_copied];
+                        handle value;
+
+                        if (arg_rec.value) {
+                            value = arg_rec.value;
+                        }
+                        if (value) {
+                            call.args.push_back(value);
+                            call.args_convert.push_back(arg_rec.convert);
+                        } else
+                            break;
+                    }
+
+                    if (args_copied < func.nargs_pos_only)
+                        continue; // Not enough defaults to fill the positional arguments
+                }
+
                 // 2. Check kwargs and, failing that, defaults that may help complete the list
-                if (args_copied < pos_args) {
+                if (args_copied < num_args) {
                     bool copied_kwargs = false;
 
-                    for (; args_copied < pos_args; ++args_copied) {
-                        const auto &arg = func.args[args_copied];
+                    for (; args_copied < num_args; ++args_copied) {
+                        const auto &arg_rec = func.args[args_copied];
 
                         handle value;
-                        if (kwargs_in && arg.name)
-                            value = PyDict_GetItemString(kwargs.ptr(), arg.name);
+                        if (kwargs_in && arg_rec.name)
+                            value = PyDict_GetItemString(kwargs.ptr(), arg_rec.name);
 
                         if (value) {
                             // Consume a kwargs value
@@ -551,25 +640,29 @@
                                 kwargs = reinterpret_steal<dict>(PyDict_Copy(kwargs.ptr()));
                                 copied_kwargs = true;
                             }
-                            PyDict_DelItemString(kwargs.ptr(), arg.name);
-                        } else if (arg.value) {
-                            value = arg.value;
+                            PyDict_DelItemString(kwargs.ptr(), arg_rec.name);
+                        } else if (arg_rec.value) {
+                            value = arg_rec.value;
+                        }
+
+                        if (!arg_rec.none && value.is_none()) {
+                            break;
                         }
 
                         if (value) {
                             call.args.push_back(value);
-                            call.args_convert.push_back(arg.convert);
+                            call.args_convert.push_back(arg_rec.convert);
                         }
                         else
                             break;
                     }
 
-                    if (args_copied < pos_args)
+                    if (args_copied < num_args)
                         continue; // Not enough arguments, defaults, or kwargs to fill the positional arguments
                 }
 
                 // 3. Check everything was consumed (unless we have a kwargs arg)
-                if (kwargs && kwargs.size() > 0 && !func.has_kwargs)
+                if (kwargs && !kwargs.empty() && !func.has_kwargs)
                     continue; // Unconsumed kwargs, but no py::kwargs argument to accept them
 
                 // 4a. If we have a py::args argument, create a new tuple with leftovers
@@ -667,7 +760,7 @@
         } catch (error_already_set &e) {
             e.restore();
             return nullptr;
-#if defined(__GNUG__) && !defined(__clang__)
+#ifdef __GLIBCXX__
         } catch ( abi::__forced_unwind& ) {
             throw;
 #endif
@@ -749,18 +842,27 @@
             for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) {
                 if (!some_args) some_args = true;
                 else msg += ", ";
-                msg += pybind11::repr(args_[ti]);
+                try {
+                    msg += pybind11::repr(args_[ti]);
+                } catch (const error_already_set&) {
+                    msg += "<repr raised Error>";
+                }
             }
             if (kwargs_in) {
                 auto kwargs = reinterpret_borrow<dict>(kwargs_in);
-                if (kwargs.size() > 0) {
+                if (!kwargs.empty()) {
                     if (some_args) msg += "; ";
                     msg += "kwargs: ";
                     bool first = true;
                     for (auto kwarg : kwargs) {
                         if (first) first = false;
                         else msg += ", ";
-                        msg += pybind11::str("{}={!r}").format(kwarg.first, kwarg.second);
+                        msg += pybind11::str("{}=").format(kwarg.first);
+                        try {
+                            msg += pybind11::repr(kwarg.second);
+                        } catch (const error_already_set&) {
+                            msg += "<repr raised Error>";
+                        }
                     }
                 }
             }
@@ -786,27 +888,18 @@
 };
 
 /// Wrapper for Python extension modules
-class module : public object {
+class module_ : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(module, object, PyModule_Check)
+    PYBIND11_OBJECT_DEFAULT(module_, object, PyModule_Check)
 
     /// Create a new top-level Python module with the given name and docstring
-    explicit module(const char *name, const char *doc = nullptr) {
-        if (!options::show_user_defined_docstrings()) doc = nullptr;
+    PYBIND11_DEPRECATED("Use PYBIND11_MODULE or module_::create_extension_module instead")
+    explicit module_(const char *name, const char *doc = nullptr) {
 #if PY_MAJOR_VERSION >= 3
-        PyModuleDef *def = new PyModuleDef();
-        std::memset(def, 0, sizeof(PyModuleDef));
-        def->m_name = name;
-        def->m_doc = doc;
-        def->m_size = -1;
-        Py_INCREF(def);
-        m_ptr = PyModule_Create(def);
+        *this = create_extension_module(name, doc, new PyModuleDef());
 #else
-        m_ptr = Py_InitModule3(name, nullptr, doc);
+        *this = create_extension_module(name, doc, nullptr);
 #endif
-        if (m_ptr == nullptr)
-            pybind11_fail("Internal error in module::module()");
-        inc_ref();
     }
 
     /** \rst
@@ -815,7 +908,7 @@
         details on the ``Extra&& ... extra`` argument, see section :ref:`extras`.
     \endrst */
     template <typename Func, typename... Extra>
-    module &def(const char *name_, Func &&f, const Extra& ... extra) {
+    module_ &def(const char *name_, Func &&f, const Extra& ... extra) {
         cpp_function func(std::forward<Func>(f), name(name_), scope(*this),
                           sibling(getattr(*this, name_, none())), extra...);
         // NB: allow overwriting here because cpp_function sets up a chain with the intention of
@@ -830,14 +923,14 @@
 
         .. code-block:: cpp
 
-            py::module m("example", "pybind11 example plugin");
-            py::module m2 = m.def_submodule("sub", "A submodule of 'example'");
-            py::module m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'");
+            py::module_ m("example", "pybind11 example plugin");
+            py::module_ m2 = m.def_submodule("sub", "A submodule of 'example'");
+            py::module_ m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'");
     \endrst */
-    module def_submodule(const char *name, const char *doc = nullptr) {
+    module_ def_submodule(const char *name, const char *doc = nullptr) {
         std::string full_name = std::string(PyModule_GetName(m_ptr))
             + std::string(".") + std::string(name);
-        auto result = reinterpret_borrow<module>(PyImport_AddModule(full_name.c_str()));
+        auto result = reinterpret_borrow<module_>(PyImport_AddModule(full_name.c_str()));
         if (doc && options::show_user_defined_docstrings())
             result.attr("__doc__") = pybind11::str(doc);
         attr(name) = result;
@@ -845,11 +938,11 @@
     }
 
     /// Import and return a module or throws `error_already_set`.
-    static module import(const char *name) {
+    static module_ import(const char *name) {
         PyObject *obj = PyImport_ImportModule(name);
         if (!obj)
             throw error_already_set();
-        return reinterpret_steal<module>(obj);
+        return reinterpret_steal<module_>(obj);
     }
 
     /// Reload the module or throws `error_already_set`.
@@ -857,14 +950,16 @@
         PyObject *obj = PyImport_ReloadModule(ptr());
         if (!obj)
             throw error_already_set();
-        *this = reinterpret_steal<module>(obj);
+        *this = reinterpret_steal<module_>(obj);
     }
 
-    // Adds an object to the module using the given name.  Throws if an object with the given name
-    // already exists.
-    //
-    // overwrite should almost always be false: attempting to overwrite objects that pybind11 has
-    // established will, in most cases, break things.
+    /** \rst
+        Adds an object to the module using the given name.  Throws if an object with the given name
+        already exists.
+
+        ``overwrite`` should almost always be false: attempting to overwrite objects that pybind11 has
+        established will, in most cases, break things.
+    \endrst */
     PYBIND11_NOINLINE void add_object(const char *name, handle obj, bool overwrite = false) {
         if (!overwrite && hasattr(*this, name))
             pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" +
@@ -872,25 +967,71 @@
 
         PyModule_AddObject(ptr(), name, obj.inc_ref().ptr() /* steals a reference */);
     }
+
+#if PY_MAJOR_VERSION >= 3
+    using module_def = PyModuleDef;
+#else
+    struct module_def {};
+#endif
+
+    /** \rst
+        Create a new top-level module that can be used as the main module of a C extension.
+
+        For Python 3, ``def`` should point to a staticly allocated module_def.
+        For Python 2, ``def`` can be a nullptr and is completely ignored.
+    \endrst */
+    static module_ create_extension_module(const char *name, const char *doc, module_def *def) {
+#if PY_MAJOR_VERSION >= 3
+        // module_def is PyModuleDef
+        def = new (def) PyModuleDef {  // Placement new (not an allocation).
+            /* m_base */     PyModuleDef_HEAD_INIT,
+            /* m_name */     name,
+            /* m_doc */      options::show_user_defined_docstrings() ? doc : nullptr,
+            /* m_size */     -1,
+            /* m_methods */  nullptr,
+            /* m_slots */    nullptr,
+            /* m_traverse */ nullptr,
+            /* m_clear */    nullptr,
+            /* m_free */     nullptr
+        };
+        auto m = PyModule_Create(def);
+#else
+        // Ignore module_def *def; only necessary for Python 3
+        (void) def;
+        auto m = Py_InitModule3(name, nullptr, options::show_user_defined_docstrings() ? doc : nullptr);
+#endif
+        if (m == nullptr) {
+            if (PyErr_Occurred())
+                throw error_already_set();
+            pybind11_fail("Internal error in module_::create_extension_module()");
+        }
+        // TODO: Sould be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_...
+        //       For Python 2, reinterpret_borrow is correct.
+        return reinterpret_borrow<module_>(m);
+    }
 };
 
+// When inside a namespace (or anywhere as long as it's not the first item on a line),
+// C++20 allows "module" to be used. This is provided for backward compatibility, and for
+// simplicity, if someone wants to use py::module for example, that is perfectly safe.
+using module = module_;
+
 /// \ingroup python_builtins
 /// Return a dictionary representing the global variables in the current execution frame,
 /// or ``__main__.__dict__`` if there is no frame (usually when the interpreter is embedded).
 inline dict globals() {
     PyObject *p = PyEval_GetGlobals();
-    return reinterpret_borrow<dict>(p ? p : module::import("__main__").attr("__dict__").ptr());
+    return reinterpret_borrow<dict>(p ? p : module_::import("__main__").attr("__dict__").ptr());
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /// Generic support for creating new Python heap types
 class generic_type : public object {
-    template <typename...> friend class class_;
 public:
     PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check)
 protected:
     void initialize(const type_record &rec) {
-        if (rec.scope && hasattr(rec.scope, rec.name))
+        if (rec.scope && hasattr(rec.scope, "__dict__") && rec.scope.attr("__dict__").contains(rec.name))
             pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) +
                           "\": an object with that name is already defined");
 
@@ -954,13 +1095,13 @@
     void install_buffer_funcs(
             buffer_info *(*get_buffer)(PyObject *, void *),
             void *get_buffer_data) {
-        PyHeapTypeObject *type = (PyHeapTypeObject*) m_ptr;
+        auto *type = (PyHeapTypeObject*) m_ptr;
         auto tinfo = detail::get_type_info(&type->ht_type);
 
         if (!type->ht_type.tp_as_buffer)
             pybind11_fail(
                 "To be able to register buffer protocol support for the type '" +
-                std::string(tinfo->type->tp_name) +
+                get_fully_qualified_tp_name(tinfo->type) +
                 "' the associated class<>(..) invocation must "
                 "include the pybind11::buffer_protocol() annotation!");
 
@@ -1020,7 +1161,14 @@
     #endif
 }
 
-NAMESPACE_END(detail)
+inline void add_class_method(object& cls, const char *name_, const cpp_function &cf) {
+    cls.attr(cf.name()) = cf;
+    if (strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) {
+      cls.attr("__hash__") = none();
+    }
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 /// Given a pointer to a member function, cast it to its `Derived` version.
 /// Forward everything else unchanged.
@@ -1117,7 +1265,7 @@
     class_ &def(const char *name_, Func&& f, const Extra&... extra) {
         cpp_function cf(method_adaptor<type>(std::forward<Func>(f)), name(name_), is_method(*this),
                         sibling(getattr(*this, name_, none())), extra...);
-        attr(cf.name()) = cf;
+        add_class_method(*this, name_, cf);
         return *this;
     }
 
@@ -1167,15 +1315,20 @@
         return *this;
     }
 
-    template <typename Func> class_& def_buffer(Func &&func) {
+    template <typename Func>
+    class_& def_buffer(Func &&func) {
         struct capture { Func func; };
-        capture *ptr = new capture { std::forward<Func>(func) };
+        auto *ptr = new capture { std::forward<Func>(func) };
         install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* {
             detail::make_caster<type> caster;
             if (!caster.load(obj, false))
                 return nullptr;
             return new buffer_info(((capture *) ptr)->func(caster));
         }, ptr);
+        weakref(m_ptr, cpp_function([ptr](handle wr) {
+            delete ptr;
+            wr.dec_ref();
+        })).release();
         return *this;
     }
 
@@ -1354,6 +1507,13 @@
 
     /// Deallocates an instance; via holder, if constructed; otherwise via operator delete.
     static void dealloc(detail::value_and_holder &v_h) {
+        // We could be deallocating because we are cleaning up after a Python exception.
+        // If so, the Python error indicator will be set. We need to clear that before
+        // running the destructor, in case the destructor code calls more Python.
+        // If we don't, the Python API will exit with an exception, and pybind11 will
+        // throw error_already_set from the C++ destructor which is forbidden and triggers
+        // std::terminate().
+        error_scope scope;
         if (v_h.holder_constructed()) {
             v_h.holder<holder_type>().~holder_type();
             v_h.set_holder_constructed(false);
@@ -1398,7 +1558,17 @@
     return {std::forward<GetState>(g), std::forward<SetState>(s)};
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+inline str enum_name(handle arg) {
+    dict entries = arg.get_type().attr("__entries");
+    for (auto kv : entries) {
+        if (handle(kv.second[int_(0)]).equal(arg))
+            return pybind11::str(kv.first);
+    }
+    return "???";
+}
+
 struct enum_base {
     enum_base(handle base, handle parent) : m_base(base), m_parent(parent) { }
 
@@ -1408,29 +1578,21 @@
         auto static_property = handle((PyObject *) get_internals().static_property_type);
 
         m_base.attr("__repr__") = cpp_function(
-            [](handle arg) -> str {
-                handle type = arg.get_type();
+            [](object arg) -> str {
+                handle type = type::handle_of(arg);
                 object type_name = type.attr("__name__");
-                dict entries = type.attr("__entries");
-                for (const auto &kv : entries) {
-                    object other = kv.second[int_(0)];
-                    if (other.equal(arg))
-                        return pybind11::str("{}.{}").format(type_name, kv.first);
-                }
-                return pybind11::str("{}.???").format(type_name);
-            }, is_method(m_base)
+                return pybind11::str("<{}.{}: {}>").format(type_name, enum_name(arg), int_(arg));
+            }, name("__repr__"), is_method(m_base)
         );
 
-        m_base.attr("name") = property(cpp_function(
+        m_base.attr("name") = property(cpp_function(&enum_name, name("name"), is_method(m_base)));
+
+        m_base.attr("__str__") = cpp_function(
             [](handle arg) -> str {
-                dict entries = arg.get_type().attr("__entries");
-                for (const auto &kv : entries) {
-                    if (handle(kv.second[int_(0)]).equal(arg))
-                        return pybind11::str(kv.first);
-                }
-                return "???";
-            }, is_method(m_base)
-        ));
+                object type_name = type::handle_of(arg).attr("__name__");
+                return pybind11::str("{}.{}").format(type_name, enum_name(arg));
+            }, name("name"), is_method(m_base)
+        );
 
         m_base.attr("__doc__") = static_property(cpp_function(
             [](handle arg) -> std::string {
@@ -1439,7 +1601,7 @@
                 if (((PyTypeObject *) arg.ptr())->tp_doc)
                     docstring += std::string(((PyTypeObject *) arg.ptr())->tp_doc) + "\n\n";
                 docstring += "Members:";
-                for (const auto &kv : entries) {
+                for (auto kv : entries) {
                     auto key = std::string(pybind11::str(kv.first));
                     auto comment = kv.second[int_(1)];
                     docstring += "\n\n  " + key;
@@ -1447,26 +1609,26 @@
                         docstring += " : " + (std::string) pybind11::str(comment);
                 }
                 return docstring;
-            }
+            }, name("__doc__")
         ), none(), none(), "");
 
         m_base.attr("__members__") = static_property(cpp_function(
             [](handle arg) -> dict {
                 dict entries = arg.attr("__entries"), m;
-                for (const auto &kv : entries)
+                for (auto kv : entries)
                     m[kv.first] = kv.second[int_(0)];
                 return m;
-            }), none(), none(), ""
+            }, name("__members__")), none(), none(), ""
         );
 
         #define PYBIND11_ENUM_OP_STRICT(op, expr, strict_behavior)                     \
             m_base.attr(op) = cpp_function(                                            \
                 [](object a, object b) {                                               \
-                    if (!a.get_type().is(b.get_type()))                                \
+                    if (!type::handle_of(a).is(type::handle_of(b)))                    \
                         strict_behavior;                                               \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         #define PYBIND11_ENUM_OP_CONV(op, expr)                                        \
             m_base.attr(op) = cpp_function(                                            \
@@ -1474,7 +1636,7 @@
                     int_ a(a_), b(b_);                                                 \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         #define PYBIND11_ENUM_OP_CONV_LHS(op, expr)                                    \
             m_base.attr(op) = cpp_function(                                            \
@@ -1482,7 +1644,7 @@
                     int_ a(a_);                                                        \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         if (is_convertible) {
             PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() &&  a.equal(b));
@@ -1500,7 +1662,7 @@
                 PYBIND11_ENUM_OP_CONV("__xor__",  a ^  b);
                 PYBIND11_ENUM_OP_CONV("__rxor__", a ^  b);
                 m_base.attr("__invert__") = cpp_function(
-                    [](object arg) { return ~(int_(arg)); }, is_method(m_base));
+                    [](object arg) { return ~(int_(arg)); }, name("__invert__"), is_method(m_base));
             }
         } else {
             PYBIND11_ENUM_OP_STRICT("__eq__",  int_(a).equal(int_(b)), return false);
@@ -1520,11 +1682,11 @@
         #undef PYBIND11_ENUM_OP_CONV
         #undef PYBIND11_ENUM_OP_STRICT
 
-        object getstate = cpp_function(
-            [](object arg) { return int_(arg); }, is_method(m_base));
+        m_base.attr("__getstate__") = cpp_function(
+            [](object arg) { return int_(arg); }, name("__getstate__"), is_method(m_base));
 
-        m_base.attr("__getstate__") = getstate;
-        m_base.attr("__hash__") = getstate;
+        m_base.attr("__hash__") = cpp_function(
+            [](object arg) { return int_(arg); }, name("__hash__"), is_method(m_base));
     }
 
     PYBIND11_NOINLINE void value(char const* name_, object value, const char *doc = nullptr) {
@@ -1541,7 +1703,7 @@
 
     PYBIND11_NOINLINE void export_values() {
         dict entries = m_base.attr("__entries");
-        for (const auto &kv : entries)
+        for (auto kv : entries)
             m_parent.attr(kv.first) = kv.second[int_(0)];
     }
 
@@ -1549,7 +1711,7 @@
     handle m_parent;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Binds C++ enumerations and enumeration classes to Python
 template <typename Type> class enum_ : public class_<Type> {
@@ -1568,7 +1730,7 @@
         constexpr bool is_convertible = std::is_convertible<Type, Scalar>::value;
         m_base.init(is_arithmetic, is_convertible);
 
-        def(init([](Scalar i) { return static_cast<Type>(i); }));
+        def(init([](Scalar i) { return static_cast<Type>(i); }), arg("value"));
         def("__int__", [](Type value) { return (Scalar) value; });
         #if PY_MAJOR_VERSION < 3
             def("__long__", [](Type value) { return (Scalar) value; });
@@ -1577,10 +1739,12 @@
             def("__index__", [](Type value) { return (Scalar) value; });
         #endif
 
-        cpp_function setstate(
-            [](Type &value, Scalar arg) { value = static_cast<Type>(arg); },
-            is_method(*this));
-        attr("__setstate__") = setstate;
+        attr("__setstate__") = cpp_function(
+            [](detail::value_and_holder &v_h, Scalar arg) {
+                detail::initimpl::setstate<Base>(v_h, static_cast<Type>(arg),
+                        Py_TYPE(v_h.inst) != v_h.type->type); },
+            detail::is_new_style_constructor(),
+            pybind11::name("__setstate__"), is_method(*this), arg("state"));
     }
 
     /// Export enumeration entries into the parent scope
@@ -1599,7 +1763,7 @@
     detail::enum_base m_base;
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 
 inline void keep_alive_impl(handle nurse, handle patient) {
@@ -1669,7 +1833,7 @@
     bool first_or_done;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Makes a python iterator from a first and past-the-end C++ InputIterator.
 template <return_value_policy Policy = return_value_policy::reference_internal,
@@ -1678,7 +1842,7 @@
           typename ValueType = decltype(*std::declval<Iterator>()),
           typename... Extra>
 iterator make_iterator(Iterator first, Sentinel last, Extra &&... extra) {
-    typedef detail::iterator_state<Iterator, Sentinel, false, Policy> state;
+    using state = detail::iterator_state<Iterator, Sentinel, false, Policy>;
 
     if (!detail::get_type_info(typeid(state), false)) {
         class_<state>(handle(), "iterator", pybind11::module_local())
@@ -1707,7 +1871,7 @@
           typename KeyType = decltype((*std::declval<Iterator>()).first),
           typename... Extra>
 iterator make_key_iterator(Iterator first, Sentinel last, Extra &&... extra) {
-    typedef detail::iterator_state<Iterator, Sentinel, true, Policy> state;
+    using state = detail::iterator_state<Iterator, Sentinel, true, Policy>;
 
     if (!detail::get_type_info(typeid(state), false)) {
         class_<state>(handle(), "iterator", pybind11::module_local())
@@ -1786,11 +1950,11 @@
 class exception : public object {
 public:
     exception() = default;
-    exception(handle scope, const char *name, PyObject *base = PyExc_Exception) {
+    exception(handle scope, const char *name, handle base = PyExc_Exception) {
         std::string full_name = scope.attr("__name__").cast<std::string>() +
                                 std::string(".") + name;
-        m_ptr = PyErr_NewException(const_cast<char *>(full_name.c_str()), base, NULL);
-        if (hasattr(scope, name))
+        m_ptr = PyErr_NewException(const_cast<char *>(full_name.c_str()), base.ptr(), NULL);
+        if (hasattr(scope, "__dict__") && scope.attr("__dict__").contains(name))
             pybind11_fail("Error during initialization: multiple incompatible "
                           "definitions with name \"" + std::string(name) + "\"");
         scope.attr(name) = *this;
@@ -1802,13 +1966,13 @@
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns a reference to a function-local static exception object used in the simple
 // register_exception approach below.  (It would be simpler to have the static local variable
 // directly in register_exception, but that makes clang <3.5 segfault - issue #1349).
 template <typename CppException>
 exception<CppException> &get_exception_object() { static exception<CppException> ex; return ex; }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /**
  * Registers a Python exception in `m` of the given `name` and installs an exception translator to
@@ -1819,7 +1983,7 @@
 template <typename CppException>
 exception<CppException> &register_exception(handle scope,
                                             const char *name,
-                                            PyObject *base = PyExc_Exception) {
+                                            handle base = PyExc_Exception) {
     auto &ex = detail::get_exception_object<CppException>();
     if (!ex) ex = exception<CppException>(scope, name, base);
 
@@ -1834,7 +1998,7 @@
     return ex;
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
     auto strings = tuple(args.size());
     for (size_t i = 0; i < args.size(); ++i) {
@@ -1848,7 +2012,7 @@
         file = kwargs["file"].cast<object>();
     } else {
         try {
-            file = module::import("sys").attr("stdout");
+            file = module_::import("sys").attr("stdout");
         } catch (const error_already_set &) {
             /* If print() is called from code that is executed as
                part of garbage collection during interpreter shutdown,
@@ -1865,7 +2029,7 @@
     if (kwargs.contains("flush") && kwargs["flush"].cast<bool>())
         file.attr("flush")();
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template <return_value_policy policy = return_value_policy::automatic_reference, typename... Args>
 void print(Args &&...args) {
@@ -2028,21 +2192,22 @@
     }
 }
 
-inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name)  {
-    handle self = detail::get_object_handle(this_ptr, this_type);
+PYBIND11_NAMESPACE_BEGIN(detail)
+inline function get_type_override(const void *this_ptr, const type_info *this_type, const char *name)  {
+    handle self = get_object_handle(this_ptr, this_type);
     if (!self)
         return function();
-    handle type = self.get_type();
+    handle type = type::handle_of(self);
     auto key = std::make_pair(type.ptr(), name);
 
-    /* Cache functions that aren't overloaded in Python to avoid
+    /* Cache functions that aren't overridden in Python to avoid
        many costly Python dictionary lookups below */
-    auto &cache = detail::get_internals().inactive_overload_cache;
+    auto &cache = get_internals().inactive_override_cache;
     if (cache.find(key) != cache.end())
         return function();
 
-    function overload = getattr(self, name, function());
-    if (overload.is_cpp_function()) {
+    function override = getattr(self, name, function());
+    if (override.is_cpp_function()) {
         cache.insert(key);
         return function();
     }
@@ -2082,34 +2247,36 @@
     Py_DECREF(result);
 #endif
 
-    return overload;
+    return override;
 }
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
   Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr.
 
-  :this_ptr: The pointer to the object the overload should be retrieved for. This should be the first
-                   non-trampoline class encountered in the inheritance chain.
-  :name: The name of the overloaded Python method to retrieve.
+  :this_ptr: The pointer to the object the overriden method should be retrieved for. This should be
+             the first non-trampoline class encountered in the inheritance chain.
+  :name: The name of the overridden Python method to retrieve.
   :return: The Python method by this name from the object or an empty function wrapper.
  \endrst */
-template <class T> function get_overload(const T *this_ptr, const char *name) {
+template <class T> function get_override(const T *this_ptr, const char *name) {
     auto tinfo = detail::get_type_info(typeid(T));
-    return tinfo ? get_type_overload(this_ptr, tinfo, name) : function();
+    return tinfo ? detail::get_type_override(this_ptr, tinfo, name) : function();
 }
 
-#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) { \
+#define PYBIND11_OVERRIDE_IMPL(ret_type, cname, name, ...) \
+    do { \
         pybind11::gil_scoped_acquire gil; \
-        pybind11::function overload = pybind11::get_overload(static_cast<const cname *>(this), name); \
-        if (overload) { \
-            auto o = overload(__VA_ARGS__); \
+        pybind11::function override = pybind11::get_override(static_cast<const cname *>(this), name); \
+        if (override) { \
+            auto o = override(__VA_ARGS__); \
             if (pybind11::detail::cast_is_temporary_value_reference<ret_type>::value) { \
-                static pybind11::detail::overload_caster_t<ret_type> caster; \
+                static pybind11::detail::override_caster_t<ret_type> caster; \
                 return pybind11::detail::cast_ref<ret_type>(std::move(o), caster); \
             } \
             else return pybind11::detail::cast_safe<ret_type>(std::move(o)); \
         } \
-    }
+    } while (false)
 
 /** \rst
     Macro to populate the virtual method in the trampoline class. This macro tries to look up a method named 'fn'
@@ -2120,25 +2287,29 @@
     .. code-block:: cpp
 
       std::string toString() override {
-        PYBIND11_OVERLOAD_NAME(
+        PYBIND11_OVERRIDE_NAME(
             std::string, // Return type (ret_type)
             Animal,      // Parent class (cname)
-            toString,    // Name of function in C++ (name)
-            "__str__",   // Name of method in Python (fn)
+            "__str__",   // Name of method in Python (name)
+            toString,    // Name of function in C++ (fn)
         );
       }
 \endrst */
-#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \
-    PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \
-    return cname::fn(__VA_ARGS__)
+#define PYBIND11_OVERRIDE_NAME(ret_type, cname, name, fn, ...) \
+    do { \
+        PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
+        return cname::fn(__VA_ARGS__); \
+    } while (false)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD_NAME`, except that it
-    throws if no overload can be found.
+    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE_NAME`, except that it
+    throws if no override can be found.
 \endrst */
-#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \
-    PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \
-    pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\"");
+#define PYBIND11_OVERRIDE_PURE_NAME(ret_type, cname, name, fn, ...) \
+    do { \
+        PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
+        pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\""); \
+    } while (false)
 
 /** \rst
     Macro to populate the virtual method in the trampoline class. This macro tries to look up the method
@@ -2155,7 +2326,7 @@
 
           // Trampoline (need one for each virtual function)
           std::string go(int n_times) override {
-              PYBIND11_OVERLOAD_PURE(
+              PYBIND11_OVERRIDE_PURE(
                   std::string, // Return type (ret_type)
                   Animal,      // Parent class (cname)
                   go,          // Name of function in C++ (must match Python name) (fn)
@@ -2164,17 +2335,41 @@
           }
       };
 \endrst */
-#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \
-    PYBIND11_OVERLOAD_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+#define PYBIND11_OVERRIDE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD`, except that it throws
-    if no overload can be found.
+    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE`, except that it throws
+    if no override can be found.
 \endrst */
-#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \
-    PYBIND11_OVERLOAD_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+#define PYBIND11_OVERRIDE_PURE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+
+// Deprecated versions
+
+PYBIND11_DEPRECATED("get_type_overload has been deprecated")
+inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) {
+    return detail::get_type_override(this_ptr, this_type, name);
+}
+
+template <class T>
+inline function get_overload(const T *this_ptr, const char *name) {
+    return get_override(this_ptr, name);
+}
+
+#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) \
+    PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__)
+#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \
+    PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__)
+#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \
+    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__);
+#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__)
+#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_PURE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__);
+
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
 #  pragma warning(pop)
diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index 4003d69..1010ad7 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -14,14 +14,15 @@
 #include <utility>
 #include <type_traits>
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /* A few forward declarations */
 class handle; class object;
 class str; class iterator;
+class type;
 struct arg; struct arg_v;
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 class args_proxy;
 inline bool isinstance_generic(handle obj, const std::type_info &tp);
 
@@ -34,7 +35,7 @@
     struct sequence_item;
     struct list_item;
     struct tuple_item;
-}
+} // namespace accessor_policies
 using obj_attr_accessor = accessor<accessor_policies::obj_attr>;
 using str_attr_accessor = accessor<accessor_policies::str_attr>;
 using item_accessor = accessor<accessor_policies::generic_item>;
@@ -151,14 +152,15 @@
 
     /// Return the object's current reference count
     int ref_count() const { return static_cast<int>(Py_REFCNT(derived().ptr())); }
-    /// Return a handle to the Python type object underlying the instance
+
+    // TODO PYBIND11_DEPRECATED("Call py::type::handle_of(h) or py::type::of(h) instead of h.get_type()")
     handle get_type() const;
 
 private:
     bool rich_compare(object_api const &other, int value) const;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     Holds a reference to a Python object (no reference counting)
@@ -240,7 +242,7 @@
     ~object() { dec_ref(); }
 
     /** \rst
-        Resets the internal pointer to ``nullptr`` without without decreasing the
+        Resets the internal pointer to ``nullptr`` without decreasing the
         object's reference count. The function returns a raw handle to the original
         Python object.
     \endrst */
@@ -311,9 +313,9 @@
 \endrst */
 template <typename T> T reinterpret_steal(handle h) { return {h, object::stolen_t{}}; }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 inline std::string error_string();
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Fetch and hold an error which was already set in Python.  An instance of this is typically
 /// thrown to propagate python-side errors back through C++ which can either be caught manually or
@@ -330,13 +332,27 @@
     error_already_set(const error_already_set &) = default;
     error_already_set(error_already_set &&) = default;
 
-    inline ~error_already_set();
+    inline ~error_already_set() override;
 
     /// Give the currently-held error back to Python, if any.  If there is currently a Python error
     /// already set it is cleared first.  After this call, the current object no longer stores the
     /// error variables (but the `.what()` string is still available).
     void restore() { PyErr_Restore(m_type.release().ptr(), m_value.release().ptr(), m_trace.release().ptr()); }
 
+    /// If it is impossible to raise the currently-held error, such as in destructor, we can write
+    /// it out using Python's unraisable hook (sys.unraisablehook). The error context should be
+    /// some object whose repr() helps identify the location of the error. Python already knows the
+    /// type and value of the error, so there is no need to repeat that. For example, __func__ could
+    /// be helpful. After this call, the current object no longer stores the error variables,
+    /// and neither does Python.
+    void discard_as_unraisable(object err_context) {
+        restore();
+        PyErr_WriteUnraisable(err_context.ptr());
+    }
+    void discard_as_unraisable(const char *err_context) {
+        discard_as_unraisable(reinterpret_steal<object>(PYBIND11_FROM_STRING(err_context)));
+    }
+
     // Does nothing; provided for backwards compatibility.
     PYBIND11_DEPRECATED("Use of error_already_set.clear() is deprecated")
     void clear() {}
@@ -370,7 +386,7 @@
 template <typename T, detail::enable_if_t<!std::is_base_of<object, T>::value, int> = 0>
 bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); }
 
-template <> inline bool isinstance<handle>(handle obj) = delete;
+template <> inline bool isinstance<handle>(handle) = delete;
 template <> inline bool isinstance<object>(handle obj) { return obj.ptr() != nullptr; }
 
 /// \ingroup python_builtins
@@ -446,7 +462,7 @@
 
 /// @} python_builtins
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 inline handle get_function(handle value) {
     if (value) {
 #if PY_MAJOR_VERSION >= 3
@@ -520,7 +536,7 @@
     mutable object cache;
 };
 
-NAMESPACE_BEGIN(accessor_policies)
+PYBIND11_NAMESPACE_BEGIN(accessor_policies)
 struct obj_attr {
     using key_type = object;
     static object get(handle obj, handle key) { return getattr(obj, key); }
@@ -597,7 +613,7 @@
         }
     }
 };
-NAMESPACE_END(accessor_policies)
+PYBIND11_NAMESPACE_END(accessor_policies)
 
 /// STL iterator template used for tuple, list, sequence and dict
 template <typename Policy>
@@ -638,7 +654,7 @@
     friend bool operator<=(const It &a, const It &b) { return !(a > b); }
 };
 
-NAMESPACE_BEGIN(iterator_policies)
+PYBIND11_NAMESPACE_BEGIN(iterator_policies)
 /// Quick proxy class needed to implement ``operator->`` for iterators which can't return pointers
 template <typename T>
 struct arrow_proxy {
@@ -711,7 +727,7 @@
     PyObject *key = nullptr, *value = nullptr;
     ssize_t pos = -1;
 };
-NAMESPACE_END(iterator_policies)
+PYBIND11_NAMESPACE_END(iterator_policies)
 
 #if !defined(PYPY_VERSION)
 using tuple_iterator = generic_iterator<iterator_policies::sequence_fast_readonly>;
@@ -736,9 +752,7 @@
 }
 
 inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
-#if PY_MAJOR_VERSION >= 3
 inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
-#endif
 
 inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
 
@@ -770,7 +784,7 @@
 template <return_value_policy policy = return_value_policy::automatic_reference>
 class unpacking_collector;
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // TODO: After the deprecated constructors are removed, this macro can be simplified by
 //       inheriting ctors: `using Parent::Parent`. It's not an option right now because
@@ -784,7 +798,9 @@
         Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
         PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
         bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
-        static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
+        static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
+        template <typename Policy_> \
+        Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
 
 #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
     PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
@@ -794,15 +810,20 @@
     { if (!m_ptr) throw error_already_set(); } \
     Name(object &&o) \
     : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
-    { if (!m_ptr) throw error_already_set(); } \
-    template <typename Policy_> \
-    Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
+    { if (!m_ptr) throw error_already_set(); }
+
+#define PYBIND11_OBJECT_CHECK_FAILED(Name, o) \
+    ::pybind11::type_error("Object of type '" + \
+                           ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o.ptr())) + \
+                           "' is not an instance of '" #Name "'")
 
 #define PYBIND11_OBJECT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
     /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
-    Name(const object &o) : Parent(o) { } \
-    Name(object &&o) : Parent(std::move(o)) { }
+    Name(const object &o) : Parent(o) \
+    { if (o && !check_(o)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, o); } \
+    Name(object &&o) : Parent(std::move(o)) \
+    { if (o && !check_(o)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, o); }
 
 #define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT(Name, Parent, CheckFun) \
@@ -878,6 +899,32 @@
     object value = {};
 };
 
+
+
+class type : public object {
+public:
+    PYBIND11_OBJECT(type, object, PyType_Check)
+
+    /// Return a type handle from a handle or an object
+    static handle handle_of(handle h) { return handle((PyObject*) Py_TYPE(h.ptr())); }
+
+    /// Return a type object from a handle or an object
+    static type of(handle h) { return type(type::handle_of(h), borrowed_t{}); }
+
+    // Defined in pybind11/cast.h
+    /// Convert C++ type to handle if previously registered. Does not convert
+    /// standard types, like int, float. etc. yet.
+    /// See https://github.com/pybind/pybind11/issues/2486
+    template<typename T>
+    static handle handle_of();
+
+    /// Convert C++ type to type if previously registered. Does not convert
+    /// standard types, like int, float. etc. yet.
+    /// See https://github.com/pybind/pybind11/issues/2486
+    template<typename T>
+    static type of() {return type(type::handle_of<T>(), borrowed_t{}); }
+};
+
 class iterable : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(iterable, object, detail::PyIterable_Check)
@@ -908,7 +955,7 @@
         Return a string representation of the object. This is analogous to
         the ``str()`` function in Python.
     \endrst */
-    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { }
+    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); }
 
     operator std::string() const {
         object temp = *this;
@@ -948,7 +995,7 @@
     String literal version of `str`
  \endrst */
 inline str operator"" _s(const char *s, size_t size) { return {s, size}; }
-}
+} // namespace literals
 
 /// \addtogroup pytypes
 /// @{
@@ -980,6 +1027,9 @@
         return std::string(buffer, (size_t) length);
     }
 };
+// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
+// are included in the doxygen group; close here and reopen after as a workaround
+/// @} pytypes
 
 inline bytes::bytes(const pybind11::str &s) {
     object temp = s;
@@ -1009,19 +1059,19 @@
     m_ptr = obj.release().ptr();
 }
 
+/// \addtogroup pytypes
+/// @{
 class none : public object {
 public:
     PYBIND11_OBJECT(none, object, detail::PyNone_Check)
     none() : object(Py_None, borrowed_t{}) { }
 };
 
-#if PY_MAJOR_VERSION >= 3
 class ellipsis : public object {
 public:
     PYBIND11_OBJECT(ellipsis, object, detail::PyEllipsis_Check)
     ellipsis() : object(Py_Ellipsis, borrowed_t{}) { }
 };
-#endif
 
 class bool_ : public object {
 public:
@@ -1040,7 +1090,7 @@
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Converts a value to the given unsigned type.  If an error occurs, you get back (Unsigned) -1;
 // otherwise you get back the unsigned long or unsigned long long value cast to (Unsigned).
 // (The distinction is critically important when casting a returned -1 error value to some other
@@ -1060,7 +1110,7 @@
         return v == (unsigned long long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v;
     }
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 class int_ : public object {
 public:
@@ -1186,12 +1236,24 @@
     }
 
     template <typename T> operator T *() const {
+        return get_pointer<T>();
+    }
+
+    /// Get the pointer the capsule holds.
+    template<typename T = void>
+    T* get_pointer() const {
         auto name = this->name();
-        T * result = static_cast<T *>(PyCapsule_GetPointer(m_ptr, name));
+        T *result = static_cast<T *>(PyCapsule_GetPointer(m_ptr, name));
         if (!result) pybind11_fail("Unable to extract capsule contents!");
         return result;
     }
 
+    /// Replaces a capsule's pointer *without* calling the destructor on the existing one.
+    void set_pointer(const void *value) {
+        if (PyCapsule_SetPointer(m_ptr, const_cast<void *>(value)) != 0)
+            pybind11_fail("Could not set capsule pointer");
+    }
+
     const char *name() const { return PyCapsule_GetName(m_ptr); }
 };
 
@@ -1242,7 +1304,12 @@
 class sequence : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(sequence, object, PySequence_Check)
-    size_t size() const { return (size_t) PySequence_Size(m_ptr); }
+    size_t size() const {
+        ssize_t result = PySequence_Size(m_ptr);
+        if (result == -1)
+            throw error_already_set();
+        return (size_t) result;
+    }
     bool empty() const { return size() == 0; }
     detail::sequence_accessor operator[](size_t index) const { return {*this, index}; }
     detail::item_accessor operator[](handle h) const { return object::operator[](h); }
@@ -1315,7 +1382,7 @@
     buffer_info request(bool writable = false) const {
         int flags = PyBUF_STRIDES | PyBUF_FORMAT;
         if (writable) flags |= PyBUF_WRITABLE;
-        Py_buffer *view = new Py_buffer();
+        auto *view = new Py_buffer();
         if (PyObject_GetBuffer(m_ptr, view, flags) != 0) {
             delete view;
             throw error_already_set();
@@ -1326,46 +1393,154 @@
 
 class memoryview : public object {
 public:
-    explicit memoryview(const buffer_info& info) {
-        static Py_buffer buf { };
-        // Py_buffer uses signed sizes, strides and shape!..
-        static std::vector<Py_ssize_t> py_strides { };
-        static std::vector<Py_ssize_t> py_shape { };
-        buf.buf = info.ptr;
-        buf.itemsize = info.itemsize;
-        buf.format = const_cast<char *>(info.format.c_str());
-        buf.ndim = (int) info.ndim;
-        buf.len = info.size;
-        py_strides.clear();
-        py_shape.clear();
-        for (size_t i = 0; i < (size_t) info.ndim; ++i) {
-            py_strides.push_back(info.strides[i]);
-            py_shape.push_back(info.shape[i]);
-        }
-        buf.strides = py_strides.data();
-        buf.shape = py_shape.data();
-        buf.suboffsets = nullptr;
-        buf.readonly = info.readonly;
-        buf.internal = nullptr;
+    PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
 
-        m_ptr = PyMemoryView_FromBuffer(&buf);
+    /** \rst
+        Creates ``memoryview`` from ``buffer_info``.
+
+        ``buffer_info`` must be created from ``buffer::request()``. Otherwise
+        throws an exception.
+
+        For creating a ``memoryview`` from objects that support buffer protocol,
+        use ``memoryview(const object& obj)`` instead of this constructor.
+     \endrst */
+    explicit memoryview(const buffer_info& info) {
+        if (!info.view())
+            pybind11_fail("Prohibited to create memoryview without Py_buffer");
+        // Note: PyMemoryView_FromBuffer never increments obj reference.
+        m_ptr = (info.view()->obj) ?
+            PyMemoryView_FromObject(info.view()->obj) :
+            PyMemoryView_FromBuffer(info.view());
         if (!m_ptr)
             pybind11_fail("Unable to create memoryview from buffer descriptor");
     }
 
-    PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
+    /** \rst
+        Creates ``memoryview`` from static buffer.
+
+        This method is meant for providing a ``memoryview`` for C/C++ buffer not
+        managed by Python. The caller is responsible for managing the lifetime
+        of ``ptr`` and ``format``, which MUST outlive the memoryview constructed
+        here.
+
+        See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
+
+        .. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer
+
+        :param ptr: Pointer to the buffer.
+        :param itemsize: Byte size of an element.
+        :param format: Pointer to the null-terminated format string. For
+            homogeneous Buffers, this should be set to
+            ``format_descriptor<T>::value``.
+        :param shape: Shape of the tensor (1 entry per dimension).
+        :param strides: Number of bytes between adjacent entries (for each
+            per dimension).
+        :param readonly: Flag to indicate if the underlying storage may be
+            written to.
+     \endrst */
+    static memoryview from_buffer(
+        void *ptr, ssize_t itemsize, const char *format,
+        detail::any_container<ssize_t> shape,
+        detail::any_container<ssize_t> strides, bool readonly = false);
+
+    static memoryview from_buffer(
+        const void *ptr, ssize_t itemsize, const char *format,
+        detail::any_container<ssize_t> shape,
+        detail::any_container<ssize_t> strides) {
+        return memoryview::from_buffer(
+            const_cast<void*>(ptr), itemsize, format, shape, strides, true);
+    }
+
+    template<typename T>
+    static memoryview from_buffer(
+        T *ptr, detail::any_container<ssize_t> shape,
+        detail::any_container<ssize_t> strides, bool readonly = false) {
+        return memoryview::from_buffer(
+            reinterpret_cast<void*>(ptr), sizeof(T),
+            format_descriptor<T>::value, shape, strides, readonly);
+    }
+
+    template<typename T>
+    static memoryview from_buffer(
+        const T *ptr, detail::any_container<ssize_t> shape,
+        detail::any_container<ssize_t> strides) {
+        return memoryview::from_buffer(
+            const_cast<T*>(ptr), shape, strides, true);
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    /** \rst
+        Creates ``memoryview`` from static memory.
+
+        This method is meant for providing a ``memoryview`` for C/C++ buffer not
+        managed by Python. The caller is responsible for managing the lifetime
+        of ``mem``, which MUST outlive the memoryview constructed here.
+
+        This method is not available in Python 2.
+
+        See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
+
+        .. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory
+     \endrst */
+    static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) {
+        PyObject* ptr = PyMemoryView_FromMemory(
+            reinterpret_cast<char*>(mem), size,
+            (readonly) ? PyBUF_READ : PyBUF_WRITE);
+        if (!ptr)
+            pybind11_fail("Could not allocate memoryview object!");
+        return memoryview(object(ptr, stolen_t{}));
+    }
+
+    static memoryview from_memory(const void *mem, ssize_t size) {
+        return memoryview::from_memory(const_cast<void*>(mem), size, true);
+    }
+#endif
 };
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+inline memoryview memoryview::from_buffer(
+    void *ptr, ssize_t itemsize, const char* format,
+    detail::any_container<ssize_t> shape,
+    detail::any_container<ssize_t> strides, bool readonly) {
+    size_t ndim = shape->size();
+    if (ndim != strides->size())
+        pybind11_fail("memoryview: shape length doesn't match strides length");
+    ssize_t size = ndim ? 1 : 0;
+    for (size_t i = 0; i < ndim; ++i)
+        size *= (*shape)[i];
+    Py_buffer view;
+    view.buf = ptr;
+    view.obj = nullptr;
+    view.len = size * itemsize;
+    view.readonly = static_cast<int>(readonly);
+    view.itemsize = itemsize;
+    view.format = const_cast<char*>(format);
+    view.ndim = static_cast<int>(ndim);
+    view.shape = shape->data();
+    view.strides = strides->data();
+    view.suboffsets = nullptr;
+    view.internal = nullptr;
+    PyObject* obj = PyMemoryView_FromBuffer(&view);
+    if (!obj)
+        throw error_already_set();
+    return memoryview(object(obj, stolen_t{}));
+}
+#endif  // DOXYGEN_SHOULD_SKIP_THIS
 /// @} pytypes
 
 /// \addtogroup python_builtins
 /// @{
+
+/// Get the length of a Python object.
 inline size_t len(handle h) {
     ssize_t result = PyObject_Length(h.ptr());
     if (result < 0)
-        pybind11_fail("Unable to compute length of object");
+        throw error_already_set();
     return (size_t) result;
 }
 
+/// Get the length hint of a Python object.
+/// Returns 0 when this cannot be determined.
 inline size_t len_hint(handle h) {
 #if PY_VERSION_HEX >= 0x03040000
     ssize_t result = PyObject_LengthHint(h.ptr(), 0);
@@ -1399,7 +1574,7 @@
 }
 /// @} python_builtins
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template <typename D> iterator object_api<D>::begin() const { return iter(derived()); }
 template <typename D> iterator object_api<D>::end() const { return iterator::sentinel(); }
 template <typename D> item_accessor object_api<D>::operator[](handle key) const {
@@ -1428,7 +1603,7 @@
 str_attr_accessor object_api<D>::doc() const { return attr("__doc__"); }
 
 template <typename D>
-handle object_api<D>::get_type() const { return (PyObject *) Py_TYPE(derived().ptr()); }
+handle object_api<D>::get_type() const { return type::handle_of(derived()); }
 
 template <typename D>
 bool object_api<D>::rich_compare(object_api const &other, int value) const {
@@ -1480,5 +1655,5 @@
 #undef PYBIND11_MATH_OPERATOR_UNARY
 #undef PYBIND11_MATH_OPERATOR_BINARY
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h
index 32f8d29..721bb66 100644
--- a/include/pybind11/stl.h
+++ b/include/pybind11/stl.h
@@ -48,8 +48,8 @@
 #  define PYBIND11_HAS_VARIANT 1
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for
 /// forwarding a container element).  Typically used indirect via forwarded_type(), below.
@@ -266,7 +266,9 @@
     static handle cast(T_ &&src, return_value_policy policy, handle parent) {
         if (!src)
             return none().inc_ref();
-        policy = return_value_policy_override<typename T::value_type>::policy(policy);
+        if (!std::is_lvalue_reference<T>::value) {
+            policy = return_value_policy_override<T>::policy(policy);
+        }
         return value_conv::cast(*std::forward<T_>(src), policy, parent);
     }
 
@@ -287,7 +289,7 @@
     PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name + _("]"));
 };
 
-#if PYBIND11_HAS_OPTIONAL
+#if defined(PYBIND11_HAS_OPTIONAL)
 template<typename T> struct type_caster<std::optional<T>>
     : public optional_caster<std::optional<T>> {};
 
@@ -295,7 +297,7 @@
     : public void_caster<std::nullopt_t> {};
 #endif
 
-#if PYBIND11_HAS_EXP_OPTIONAL
+#if defined(PYBIND11_HAS_EXP_OPTIONAL)
 template<typename T> struct type_caster<std::experimental::optional<T>>
     : public optional_caster<std::experimental::optional<T>> {};
 
@@ -367,19 +369,19 @@
     PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster<Ts>::name...) + _("]"));
 };
 
-#if PYBIND11_HAS_VARIANT
+#if defined(PYBIND11_HAS_VARIANT)
 template <typename... Ts>
 struct type_caster<std::variant<Ts...>> : variant_caster<std::variant<Ts...>> { };
 #endif
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 inline std::ostream &operator<<(std::ostream &os, const handle &obj) {
     os << (std::string) str(obj);
     return os;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #pragma warning(pop)
diff --git a/include/pybind11/stl_bind.h b/include/pybind11/stl_bind.h
index da233ec..9d8ed0c 100644
--- a/include/pybind11/stl_bind.h
+++ b/include/pybind11/stl_bind.h
@@ -15,8 +15,8 @@
 #include <algorithm>
 #include <sstream>
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* SFINAE helper class used by 'is_comparable */
 template <typename T>  struct container_traits {
@@ -223,7 +223,7 @@
             if (!slice.compute(v.size(), &start, &stop, &step, &slicelength))
                 throw error_already_set();
 
-            Vector *seq = new Vector();
+            auto *seq = new Vector();
             seq->reserve((size_t) slicelength);
 
             for (size_t i=0; i<slicelength; ++i) {
@@ -397,14 +397,19 @@
         if (!detail::compare_buffer_info<T>::compare(info) || (ssize_t) sizeof(T) != info.itemsize)
             throw type_error("Format mismatch (Python: " + info.format + " C++: " + format_descriptor<T>::format() + ")");
 
-        auto vec = std::unique_ptr<Vector>(new Vector());
-        vec->reserve((size_t) info.shape[0]);
         T *p = static_cast<T*>(info.ptr);
         ssize_t step = info.strides[0] / static_cast<ssize_t>(sizeof(T));
         T *end = p + info.shape[0] * step;
-        for (; p != end; p += step)
-            vec->push_back(*p);
-        return vec.release();
+        if (step == 1) {
+            return Vector(p, end);
+        }
+        else {
+            Vector vec;
+            vec.reserve((size_t) info.shape[0]);
+            for (; p != end; p += step)
+                vec.push_back(*p);
+            return vec;
+        }
     }));
 
     return;
@@ -413,7 +418,7 @@
 template <typename Vector, typename Class_, typename... Args>
 enable_if_t<!detail::any_of<std::is_same<Args, buffer_protocol>...>::value> vector_buffer(Class_&) {}
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 //
 // std::vector
@@ -511,7 +516,7 @@
 // std::map, std::unordered_map
 //
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* Fallback functions */
 template <typename, typename, typename... Args> void map_if_insertion_operator(const Args &...) { }
@@ -577,7 +582,7 @@
 }
 
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template <typename Map, typename holder_type = std::unique_ptr<Map>, typename... Args>
 class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args&&... args) {
@@ -653,4 +658,4 @@
     return cl;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/pybind11/__init__.py b/pybind11/__init__.py
index 4b1de3e..ad65420 100644
--- a/pybind11/__init__.py
+++ b/pybind11/__init__.py
@@ -1,12 +1,12 @@
-from ._version import version_info, __version__  # noqa: F401 imported but unused
+# -*- coding: utf-8 -*-
+
+from ._version import version_info, __version__
+from .commands import get_include, get_cmake_dir
 
 
-def get_include(user=False):
-    import os
-    d = os.path.dirname(__file__)
-    if os.path.exists(os.path.join(d, "include")):
-        # Package is installed
-        return os.path.join(d, "include")
-    else:
-        # Package is from a source directory
-        return os.path.join(os.path.dirname(d), "include")
+__all__ = (
+    "version_info",
+    "__version__",
+    "get_include",
+    "get_cmake_dir",
+)
diff --git a/pybind11/__main__.py b/pybind11/__main__.py
index 89b263a..020988c 100644
--- a/pybind11/__main__.py
+++ b/pybind11/__main__.py
@@ -1,36 +1,52 @@
+# -*- coding: utf-8 -*-
 from __future__ import print_function
 
 import argparse
 import sys
 import sysconfig
 
-from . import get_include
+from .commands import get_include, get_cmake_dir
 
 
 def print_includes():
-    dirs = [sysconfig.get_path('include'),
-            sysconfig.get_path('platinclude'),
-            get_include()]
+    # type: () -> None
+    dirs = [
+        sysconfig.get_path("include"),
+        sysconfig.get_path("platinclude"),
+        get_include(),
+    ]
 
     # Make unique but preserve order
     unique_dirs = []
     for d in dirs:
-        if d not in unique_dirs:
+        if d and d not in unique_dirs:
             unique_dirs.append(d)
 
-    print(' '.join('-I' + d for d in unique_dirs))
+    print(" ".join("-I" + d for d in unique_dirs))
 
 
 def main():
-    parser = argparse.ArgumentParser(prog='python -m pybind11')
-    parser.add_argument('--includes', action='store_true',
-                        help='Include flags for both pybind11 and Python headers.')
+    # type: () -> None
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--includes",
+        action="store_true",
+        help="Include flags for both pybind11 and Python headers.",
+    )
+    parser.add_argument(
+        "--cmakedir",
+        action="store_true",
+        help="Print the CMake module directory, ideal for setting -Dpybind11_ROOT in CMake.",
+    )
     args = parser.parse_args()
     if not sys.argv[1:]:
         parser.print_help()
     if args.includes:
         print_includes()
+    if args.cmakedir:
+        print(get_cmake_dir())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/pybind11/_version.py b/pybind11/_version.py
index 8d5aa5c..d18535c 100644
--- a/pybind11/_version.py
+++ b/pybind11/_version.py
@@ -1,2 +1,12 @@
-version_info = (2, 5, 0)
-__version__ = '.'.join(map(str, version_info))
+# -*- coding: utf-8 -*-
+
+
+def _to_int(s):
+    try:
+        return int(s)
+    except ValueError:
+        return s
+
+
+__version__ = "2.6.1"
+version_info = tuple(_to_int(s) for s in __version__.split("."))
diff --git a/pybind11/_version.pyi b/pybind11/_version.pyi
new file mode 100644
index 0000000..970184c
--- /dev/null
+++ b/pybind11/_version.pyi
@@ -0,0 +1,6 @@
+from typing import Union, Tuple
+
+def _to_int(s: str) -> Union[int, str]: ...
+
+__version__: str
+version_info: Tuple[Union[int, str], ...]
diff --git a/pybind11/commands.py b/pybind11/commands.py
new file mode 100644
index 0000000..34dbaf8
--- /dev/null
+++ b/pybind11/commands.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+import os
+
+
+DIR = os.path.abspath(os.path.dirname(__file__))
+
+
+def get_include(user=False):
+    # type: (bool) -> str
+    installed_path = os.path.join(DIR, "include")
+    source_path = os.path.join(os.path.dirname(DIR), "include")
+    return installed_path if os.path.exists(installed_path) else source_path
+
+
+def get_cmake_dir():
+    # type: () -> str
+    cmake_installed_path = os.path.join(DIR, "share", "cmake", "pybind11")
+    if os.path.exists(cmake_installed_path):
+        return cmake_installed_path
+    else:
+        msg = "pybind11 not installed, installation required to access the CMake files"
+        raise ImportError(msg)
diff --git a/pybind11/py.typed b/pybind11/py.typed
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pybind11/py.typed
diff --git a/pybind11/setup_helpers.py b/pybind11/setup_helpers.py
new file mode 100644
index 0000000..33605dd
--- /dev/null
+++ b/pybind11/setup_helpers.py
@@ -0,0 +1,436 @@
+# -*- coding: utf-8 -*-
+
+"""
+This module provides helpers for C++11+ projects using pybind11.
+
+LICENSE:
+
+Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+# IMPORTANT: If you change this file in the pybind11 repo, also review
+# setup_helpers.pyi for matching changes.
+#
+# If you copy this file in, you don't
+# need the .pyi file; it's just an interface file for static type checkers.
+
+import contextlib
+import os
+import shutil
+import sys
+import tempfile
+import threading
+import platform
+import warnings
+
+try:
+    from setuptools.command.build_ext import build_ext as _build_ext
+    from setuptools import Extension as _Extension
+except ImportError:
+    from distutils.command.build_ext import build_ext as _build_ext
+    from distutils.extension import Extension as _Extension
+
+import distutils.errors
+import distutils.ccompiler
+
+
+WIN = sys.platform.startswith("win32")
+PY2 = sys.version_info[0] < 3
+MACOS = sys.platform.startswith("darwin")
+STD_TMPL = "/std:c++{}" if WIN else "-std=c++{}"
+
+
+# It is recommended to use PEP 518 builds if using this module. However, this
+# file explicitly supports being copied into a user's project directory
+# standalone, and pulling pybind11 with the deprecated setup_requires feature.
+# If you copy the file, remember to add it to your MANIFEST.in, and add the current
+# directory into your path if it sits beside your setup.py.
+
+
+class Pybind11Extension(_Extension):
+    """
+    Build a C++11+ Extension module with pybind11. This automatically adds the
+    recommended flags when you init the extension and assumes C++ sources - you
+    can further modify the options yourself.
+
+    The customizations are:
+
+    * ``/EHsc`` and ``/bigobj`` on Windows
+    * ``stdlib=libc++`` on macOS
+    * ``visibility=hidden`` and ``-g0`` on Unix
+
+    Finally, you can set ``cxx_std`` via constructor or afterwords to enable
+    flags for C++ std, and a few extra helper flags related to the C++ standard
+    level. It is _highly_ recommended you either set this, or use the provided
+    ``build_ext``, which will search for the highest supported extension for
+    you if the ``cxx_std`` property is not set. Do not set the ``cxx_std``
+    property more than once, as flags are added when you set it. Set the
+    property to None to disable the addition of C++ standard flags.
+
+    If you want to add pybind11 headers manually, for example for an exact
+    git checkout, then set ``include_pybind11=False``.
+
+    Warning: do not use property-based access to the instance on Python 2 -
+    this is an ugly old-style class due to Distutils.
+    """
+
+    def _add_cflags(self, *flags):
+        for flag in flags:
+            if flag not in self.extra_compile_args:
+                self.extra_compile_args.append(flag)
+
+    def _add_lflags(self, *flags):
+        for flag in flags:
+            if flag not in self.extra_link_args:
+                self.extra_link_args.append(flag)
+
+    def __init__(self, *args, **kwargs):
+
+        self._cxx_level = 0
+        cxx_std = kwargs.pop("cxx_std", 0)
+
+        if "language" not in kwargs:
+            kwargs["language"] = "c++"
+
+        include_pybind11 = kwargs.pop("include_pybind11", True)
+
+        # Can't use super here because distutils has old-style classes in
+        # Python 2!
+        _Extension.__init__(self, *args, **kwargs)
+
+        # Include the installed package pybind11 headers
+        if include_pybind11:
+            # If using setup_requires, this fails the first time - that's okay
+            try:
+                import pybind11
+
+                pyinc = pybind11.get_include()
+
+                if pyinc not in self.include_dirs:
+                    self.include_dirs.append(pyinc)
+            except ImportError:
+                pass
+
+        # Have to use the accessor manually to support Python 2 distutils
+        Pybind11Extension.cxx_std.__set__(self, cxx_std)
+
+        if WIN:
+            self._add_cflags("/EHsc", "/bigobj")
+        else:
+            self._add_cflags("-fvisibility=hidden", "-g0")
+            if MACOS:
+                self._add_cflags("-stdlib=libc++")
+                self._add_lflags("-stdlib=libc++")
+
+    @property
+    def cxx_std(self):
+        """
+        The CXX standard level. If set, will add the required flags. If left
+        at 0, it will trigger an automatic search when pybind11's build_ext
+        is used. If None, will have no effect.  Besides just the flags, this
+        may add a register warning/error fix for Python 2 or macos-min 10.9
+        or 10.14.
+        """
+        return self._cxx_level
+
+    @cxx_std.setter
+    def cxx_std(self, level):
+
+        if self._cxx_level:
+            warnings.warn("You cannot safely change the cxx_level after setting it!")
+
+        # MSVC 2015 Update 3 and later only have 14 (and later 17) modes, so
+        # force a valid flag here.
+        if WIN and level == 11:
+            level = 14
+
+        self._cxx_level = level
+
+        if not level:
+            return
+
+        self.extra_compile_args.append(STD_TMPL.format(level))
+
+        if MACOS and "MACOSX_DEPLOYMENT_TARGET" not in os.environ:
+            # C++17 requires a higher min version of macOS. An earlier version
+            # (10.12 or 10.13) can be set manually via environment variable if
+            # you are careful in your feature usage, but 10.14 is the safest
+            # setting for general use. However, never set higher than the
+            # current macOS version!
+            current_macos = tuple(int(x) for x in platform.mac_ver()[0].split(".")[:2])
+            desired_macos = (10, 9) if level < 17 else (10, 14)
+            macos_string = ".".join(str(x) for x in min(current_macos, desired_macos))
+            macosx_min = "-mmacosx-version-min=" + macos_string
+            self.extra_compile_args.append(macosx_min)
+            self.extra_link_args.append(macosx_min)
+
+        if PY2:
+            if WIN:
+                # Will be ignored on MSVC 2015, where C++17 is not supported so
+                # this flag is not valid.
+                self.extra_compile_args.append("/wd5033")
+            elif level >= 17:
+                self.extra_compile_args.append("-Wno-register")
+            elif level >= 14:
+                self.extra_compile_args.append("-Wno-deprecated-register")
+
+
+# Just in case someone clever tries to multithread
+tmp_chdir_lock = threading.Lock()
+cpp_cache_lock = threading.Lock()
+
+
+@contextlib.contextmanager
+def tmp_chdir():
+    "Prepare and enter a temporary directory, cleanup when done"
+
+    # Threadsafe
+    with tmp_chdir_lock:
+        olddir = os.getcwd()
+        try:
+            tmpdir = tempfile.mkdtemp()
+            os.chdir(tmpdir)
+            yield tmpdir
+        finally:
+            os.chdir(olddir)
+            shutil.rmtree(tmpdir)
+
+
+# cf http://bugs.python.org/issue26689
+def has_flag(compiler, flag):
+    """
+    Return the flag if a flag name is supported on the
+    specified compiler, otherwise None (can be used as a boolean).
+    If multiple flags are passed, return the first that matches.
+    """
+
+    with tmp_chdir():
+        fname = "flagcheck.cpp"
+        with open(fname, "w") as f:
+            f.write("int main (int argc, char **argv) { return 0; }")
+
+        try:
+            compiler.compile([fname], extra_postargs=[flag])
+        except distutils.errors.CompileError:
+            return False
+        return True
+
+
+# Every call will cache the result
+cpp_flag_cache = None
+
+
+def auto_cpp_level(compiler):
+    """
+    Return the max supported C++ std level (17, 14, or 11). Returns latest on Windows.
+    """
+
+    if WIN:
+        return "latest"
+
+    global cpp_flag_cache
+
+    # If this has been previously calculated with the same args, return that
+    with cpp_cache_lock:
+        if cpp_flag_cache:
+            return cpp_flag_cache
+
+    levels = [17, 14, 11]
+
+    for level in levels:
+        if has_flag(compiler, STD_TMPL.format(level)):
+            with cpp_cache_lock:
+                cpp_flag_cache = level
+            return level
+
+    msg = "Unsupported compiler -- at least C++11 support is needed!"
+    raise RuntimeError(msg)
+
+
+class build_ext(_build_ext):  # noqa: N801
+    """
+    Customized build_ext that allows an auto-search for the highest supported
+    C++ level for Pybind11Extension. This is only needed for the auto-search
+    for now, and is completely optional otherwise.
+    """
+
+    def build_extensions(self):
+        """
+        Build extensions, injecting C++ std for Pybind11Extension if needed.
+        """
+
+        for ext in self.extensions:
+            if hasattr(ext, "_cxx_level") and ext._cxx_level == 0:
+                # Python 2 syntax - old-style distutils class
+                ext.__class__.cxx_std.__set__(ext, auto_cpp_level(self.compiler))
+
+        # Python 2 doesn't allow super here, since distutils uses old-style
+        # classes!
+        _build_ext.build_extensions(self)
+
+
+def naive_recompile(obj, src):
+    """
+    This will recompile only if the source file changes. It does not check
+    header files, so a more advanced function or Ccache is better if you have
+    editable header files in your package.
+    """
+    return os.stat(obj).st_mtime < os.stat(src).st_mtime
+
+
+def no_recompile(obg, src):
+    """
+    This is the safest but slowest choice (and is the default) - will always
+    recompile sources.
+    """
+    return True
+
+
+# Optional parallel compile utility
+# inspired by: http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
+# and: https://github.com/tbenthompson/cppimport/blob/stable/cppimport/build_module.py
+# and NumPy's parallel distutils module:
+#              https://github.com/numpy/numpy/blob/master/numpy/distutils/ccompiler.py
+class ParallelCompile(object):
+    """
+    Make a parallel compile function. Inspired by
+    numpy.distutils.ccompiler.CCompiler_compile and cppimport.
+
+    This takes several arguments that allow you to customize the compile
+    function created:
+
+    envvar:
+        Set an environment variable to control the compilation threads, like
+        NPY_NUM_BUILD_JOBS
+    default:
+        0 will automatically multithread, or 1 will only multithread if the
+        envvar is set.
+    max:
+        The limit for automatic multithreading if non-zero
+    needs_recompile:
+        A function of (obj, src) that returns True when recompile is needed.  No
+        effect in isolated mode; use ccache instead, see
+        https://github.com/matplotlib/matplotlib/issues/1507/
+
+    To use::
+
+        ParallelCompile("NPY_NUM_BUILD_JOBS").install()
+
+    or::
+
+        with ParallelCompile("NPY_NUM_BUILD_JOBS"):
+            setup(...)
+
+    By default, this assumes all files need to be recompiled. A smarter
+    function can be provided via needs_recompile.  If the output has not yet
+    been generated, the compile will always run, and this function is not
+    called.
+    """
+
+    __slots__ = ("envvar", "default", "max", "_old", "needs_recompile")
+
+    def __init__(self, envvar=None, default=0, max=0, needs_recompile=no_recompile):
+        self.envvar = envvar
+        self.default = default
+        self.max = max
+        self.needs_recompile = needs_recompile
+        self._old = []
+
+    def function(self):
+        """
+        Builds a function object usable as distutils.ccompiler.CCompiler.compile.
+        """
+
+        def compile_function(
+            compiler,
+            sources,
+            output_dir=None,
+            macros=None,
+            include_dirs=None,
+            debug=0,
+            extra_preargs=None,
+            extra_postargs=None,
+            depends=None,
+        ):
+
+            # These lines are directly from distutils.ccompiler.CCompiler
+            macros, objects, extra_postargs, pp_opts, build = compiler._setup_compile(
+                output_dir, macros, include_dirs, sources, depends, extra_postargs
+            )
+            cc_args = compiler._get_cc_args(pp_opts, debug, extra_preargs)
+
+            # The number of threads; start with default.
+            threads = self.default
+
+            # Determine the number of compilation threads, unless set by an environment variable.
+            if self.envvar is not None:
+                threads = int(os.environ.get(self.envvar, self.default))
+
+            def _single_compile(obj):
+                try:
+                    src, ext = build[obj]
+                except KeyError:
+                    return
+
+                if not os.path.exists(obj) or self.needs_recompile(obj, src):
+                    compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+            try:
+                import multiprocessing
+                from multiprocessing.pool import ThreadPool
+            except ImportError:
+                threads = 1
+
+            if threads == 0:
+                try:
+                    threads = multiprocessing.cpu_count()
+                    threads = self.max if self.max and self.max < threads else threads
+                except NotImplementedError:
+                    threads = 1
+
+            if threads > 1:
+                for _ in ThreadPool(threads).imap_unordered(_single_compile, objects):
+                    pass
+            else:
+                for ob in objects:
+                    _single_compile(ob)
+
+            return objects
+
+        return compile_function
+
+    def install(self):
+        distutils.ccompiler.CCompiler.compile = self.function()
+        return self
+
+    def __enter__(self):
+        self._old.append(distutils.ccompiler.CCompiler.compile)
+        return self.install()
+
+    def __exit__(self, *args):
+        distutils.ccompiler.CCompiler.compile = self._old.pop()
diff --git a/pybind11/setup_helpers.pyi b/pybind11/setup_helpers.pyi
new file mode 100644
index 0000000..23232e1
--- /dev/null
+++ b/pybind11/setup_helpers.pyi
@@ -0,0 +1,61 @@
+# IMPORTANT: Should stay in sync with setup_helpers.py (mostly checked by CI /
+# pre-commit).
+
+from typing import Any, Callable, Iterator, Optional, Type, TypeVar, Union
+from types import TracebackType
+
+from distutils.command.build_ext import build_ext as _build_ext  # type: ignore
+from distutils.extension import Extension as _Extension
+import distutils.ccompiler
+import contextlib
+
+WIN: bool
+PY2: bool
+MACOS: bool
+STD_TMPL: str
+
+class Pybind11Extension(_Extension):
+    def _add_cflags(self, *flags: str) -> None: ...
+    def _add_lflags(self, *flags: str) -> None: ...
+    def __init__(
+        self, *args: Any, cxx_std: int = 0, language: str = "c++", **kwargs: Any
+    ) -> None: ...
+    @property
+    def cxx_std(self) -> int: ...
+    @cxx_std.setter
+    def cxx_std(self, level: int) -> None: ...
+
+@contextlib.contextmanager
+def tmp_chdir() -> Iterator[str]: ...
+def has_flag(compiler: distutils.ccompiler.CCompiler, flag: str) -> bool: ...
+def auto_cpp_level(compiler: distutils.ccompiler.CCompiler) -> Union[int, str]: ...
+
+class build_ext(_build_ext):  # type: ignore
+    def build_extensions(self) -> None: ...
+
+def no_recompile(obj: str, src: str) -> bool: ...
+def naive_recompile(obj: str, src: str) -> bool: ...
+
+T = TypeVar("T", bound="ParallelCompile")
+
+class ParallelCompile:
+    envvar: Optional[str]
+    default: int
+    max: int
+    needs_recompile: Callable[[str, str], bool]
+    def __init__(
+        self,
+        envvar: Optional[str] = None,
+        default: int = 0,
+        max: int = 0,
+        needs_recompile: Callable[[str, str], bool] = no_recompile,
+    ) -> None: ...
+    def function(self) -> Any: ...
+    def install(self: T) -> T: ...
+    def __enter__(self: T) -> T: ...
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ) -> None: ...
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..5c9d153
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=42", "wheel", "cmake>=3.18", "ninja"]
+build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
index 002f38d..e7fc8f4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,59 @@
+[metadata]
+long_description = file: README.rst
+long_description_content_type = text/x-rst
+description = Seamless operability between C++11 and Python
+author = Wenzel Jakob
+author_email = wenzel.jakob@epfl.ch
+url = https://github.com/pybind/pybind11
+license = BSD
+
+classifiers =
+    Development Status :: 5 - Production/Stable
+    Intended Audience :: Developers
+    Topic :: Software Development :: Libraries :: Python Modules
+    Topic :: Utilities
+    Programming Language :: C++
+    Programming Language :: Python :: 2.7
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.5
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    License :: OSI Approved :: BSD License
+    Programming Language :: Python :: Implementation :: PyPy
+    Programming Language :: Python :: Implementation :: CPython
+    Programming Language :: C++
+    Topic :: Software Development :: Libraries :: Python Modules
+
+keywords =
+    C++11
+    Python bindings
+
+[options]
+python_requires = >=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4
+zip_safe = False
+
 [bdist_wheel]
 universal=1
 
+[check-manifest]
+ignore =
+    tests/**
+    docs/**
+    tools/**
+    include/**
+    .appveyor.yml
+    .cmake-format.yaml
+    .gitmodules
+    .pre-commit-config.yaml
+    .readthedocs.yml
+    .clang-tidy
+    pybind11/include/**
+    pybind11/share/**
+    CMakeLists.txt
+
+
 [flake8]
 max-line-length = 99
 show_source = True
@@ -10,3 +63,9 @@
     E201, E241, W504,
     # camelcase 'cPickle' imported as lowercase 'pickle'
     N813
+    # Black conflict
+    W503, E203
+
+[mypy]
+files = pybind11
+strict = True
diff --git a/setup.py b/setup.py
index 473ea1e..3a03279 100644
--- a/setup.py
+++ b/setup.py
@@ -1,122 +1,115 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # Setup script for PyPI; use CMakeFile.txt to build extension modules
 
-from setuptools import setup
-from distutils.command.install_headers import install_headers
-from distutils.command.build_py import build_py
-from pybind11 import __version__
+import contextlib
 import os
+import re
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
 
-package_data = [
-    'include/pybind11/detail/class.h',
-    'include/pybind11/detail/common.h',
-    'include/pybind11/detail/descr.h',
-    'include/pybind11/detail/init.h',
-    'include/pybind11/detail/internals.h',
-    'include/pybind11/detail/typeid.h',
-    'include/pybind11/attr.h',
-    'include/pybind11/buffer_info.h',
-    'include/pybind11/cast.h',
-    'include/pybind11/chrono.h',
-    'include/pybind11/common.h',
-    'include/pybind11/complex.h',
-    'include/pybind11/eigen.h',
-    'include/pybind11/embed.h',
-    'include/pybind11/eval.h',
-    'include/pybind11/functional.h',
-    'include/pybind11/iostream.h',
-    'include/pybind11/numpy.h',
-    'include/pybind11/operators.h',
-    'include/pybind11/options.h',
-    'include/pybind11/pybind11.h',
-    'include/pybind11/pytypes.h',
-    'include/pybind11/stl.h',
-    'include/pybind11/stl_bind.h',
-]
+import setuptools.command.sdist
 
-# Prevent installation of pybind11 headers by setting
-# PYBIND11_USE_CMAKE.
-if os.environ.get('PYBIND11_USE_CMAKE'):
-    headers = []
-else:
-    headers = package_data
+DIR = os.path.abspath(os.path.dirname(__file__))
+VERSION_REGEX = re.compile(
+    r"^\s*#\s*define\s+PYBIND11_VERSION_([A-Z]+)\s+(.*)$", re.MULTILINE
+)
+
+# PYBIND11_GLOBAL_SDIST will build a different sdist, with the python-headers
+# files, and the sys.prefix files (CMake and headers).
+
+global_sdist = os.environ.get("PYBIND11_GLOBAL_SDIST", False)
+
+setup_py = "tools/setup_global.py.in" if global_sdist else "tools/setup_main.py.in"
+extra_cmd = 'cmdclass["sdist"] = SDist\n'
+
+to_src = (
+    ("pyproject.toml", "tools/pyproject.toml"),
+    ("setup.py", setup_py),
+)
+
+# Read the listed version
+with open("pybind11/_version.py") as f:
+    code = compile(f.read(), "pybind11/_version.py", "exec")
+loc = {}
+exec(code, loc)
+version = loc["__version__"]
+
+# Verify that the version matches the one in C++
+with open("include/pybind11/detail/common.h") as f:
+    matches = dict(VERSION_REGEX.findall(f.read()))
+cpp_version = "{MAJOR}.{MINOR}.{PATCH}".format(**matches)
+if version != cpp_version:
+    msg = "Python version {} does not match C++ version {}!".format(
+        version, cpp_version
+    )
+    raise RuntimeError(msg)
 
 
-class InstallHeaders(install_headers):
-    """Use custom header installer because the default one flattens subdirectories"""
-    def run(self):
-        if not self.distribution.headers:
-            return
-
-        for header in self.distribution.headers:
-            subdir = os.path.dirname(os.path.relpath(header, 'include/pybind11'))
-            install_dir = os.path.join(self.install_dir, subdir)
-            self.mkpath(install_dir)
-
-            (out, _) = self.copy_file(header, install_dir)
-            self.outfiles.append(out)
+def get_and_replace(filename, binary=False, **opts):
+    with open(filename, "rb" if binary else "r") as f:
+        contents = f.read()
+    # Replacement has to be done on text in Python 3 (both work in Python 2)
+    if binary:
+        return string.Template(contents.decode()).substitute(opts).encode()
+    else:
+        return string.Template(contents).substitute(opts)
 
 
-# Install the headers inside the package as well
-class BuildPy(build_py):
-    def build_package_data(self):
-        build_py.build_package_data(self)
-        for header in package_data:
-            target = os.path.join(self.build_lib, 'pybind11', header)
-            self.mkpath(os.path.dirname(target))
-            self.copy_file(header, target, preserve_mode=False)
+# Use our input files instead when making the SDist (and anything that depends
+# on it, like a wheel)
+class SDist(setuptools.command.sdist.sdist):
+    def make_release_tree(self, base_dir, files):
+        setuptools.command.sdist.sdist.make_release_tree(self, base_dir, files)
+
+        for to, src in to_src:
+            txt = get_and_replace(src, binary=True, version=version, extra_cmd="")
+
+            dest = os.path.join(base_dir, to)
+
+            # This is normally linked, so unlink before writing!
+            os.unlink(dest)
+            with open(dest, "wb") as f:
+                f.write(txt)
 
 
-setup(
-    name='pybind11',
-    version=__version__,
-    description='Seamless operability between C++11 and Python',
-    author='Wenzel Jakob',
-    author_email='wenzel.jakob@epfl.ch',
-    url='https://github.com/pybind/pybind11',
-    download_url='https://github.com/pybind/pybind11/tarball/v' + __version__,
-    packages=['pybind11'],
-    license='BSD',
-    headers=headers,
-    zip_safe=False,
-    cmdclass=dict(install_headers=InstallHeaders, build_py=BuildPy),
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Topic :: Utilities',
-        'Programming Language :: C++',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.2',
-        'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'License :: OSI Approved :: BSD License'
-    ],
-    keywords='C++11, Python bindings',
-    long_description="""pybind11 is a lightweight header-only library that
-exposes C++ types in Python and vice versa, mainly to create Python bindings of
-existing C++ code. Its goals and syntax are similar to the excellent
-Boost.Python by David Abrahams: to minimize boilerplate code in traditional
-extension modules by inferring type information using compile-time
-introspection.
+# Backport from Python 3
+@contextlib.contextmanager
+def TemporaryDirectory():  # noqa: N802
+    "Prepare a temporary directory, cleanup when done"
+    try:
+        tmpdir = tempfile.mkdtemp()
+        yield tmpdir
+    finally:
+        shutil.rmtree(tmpdir)
 
-The main issue with Boost.Python-and the reason for creating such a similar
-project-is Boost. Boost is an enormously large and complex suite of utility
-libraries that works with almost every C++ compiler in existence. This
-compatibility has its cost: arcane template tricks and workarounds are
-necessary to support the oldest and buggiest of compiler specimens. Now that
-C++11-compatible compilers are widely available, this heavy machinery has
-become an excessively large and unnecessary dependency.
 
-Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. Without
-comments, the core header files only require ~4K lines of code and depend on
-Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This
-compact implementation was possible thanks to some of the new C++11 language
-features (specifically: tuples, lambda functions and variadic templates). Since
-its creation, this library has grown beyond Boost.Python in many ways, leading
-to dramatically simpler binding code in many common situations.""")
+# Remove the CMake install directory when done
+@contextlib.contextmanager
+def remove_output(*sources):
+    try:
+        yield
+    finally:
+        for src in sources:
+            shutil.rmtree(src)
+
+
+with remove_output("pybind11/include", "pybind11/share"):
+    # Generate the files if they are not present.
+    with TemporaryDirectory() as tmpdir:
+        cmd = ["cmake", "-S", ".", "-B", tmpdir] + [
+            "-DCMAKE_INSTALL_PREFIX=pybind11",
+            "-DBUILD_TESTING=OFF",
+            "-DPYBIND11_NOPYTHON=ON",
+        ]
+        cmake_opts = dict(cwd=DIR, stdout=sys.stdout, stderr=sys.stderr)
+        subprocess.check_call(cmd, **cmake_opts)
+        subprocess.check_call(["cmake", "--install", tmpdir], **cmake_opts)
+
+    txt = get_and_replace(setup_py, version=version, extra_cmd=extra_cmd)
+    code = compile(txt, setup_py, "exec")
+    exec(code, {"SDist": SDist})
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 765c47a..dae8b5a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -5,78 +5,150 @@
 # All rights reserved. Use of this source code is governed by a
 # BSD-style license that can be found in the LICENSE file.
 
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required(VERSION 3.4)
 
-option(PYBIND11_WERROR  "Report all warnings as errors"  OFF)
+# The `cmake_minimum_required(VERSION 3.4...3.18)` syntax does not work with
+# some versions of VS that have a patched CMake 3.11. This forces us to emulate
+# the behavior using the following workaround:
+if(${CMAKE_VERSION} VERSION_LESS 3.18)
+  cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
+else()
+  cmake_policy(VERSION 3.18)
+endif()
 
-if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
-    # We're being loaded directly, i.e. not via add_subdirectory, so make this
-    # work as its own project and load the pybind11Config to get the tools we need
-    project(pybind11_tests CXX)
+# Only needed for CMake < 3.5 support
+include(CMakeParseArguments)
 
-    find_package(pybind11 REQUIRED CONFIG)
+# Filter out items; print an optional message if any items filtered
+#
+# Usage:
+#   pybind11_filter_tests(LISTNAME file1.cpp file2.cpp ... MESSAGE "")
+#
+macro(PYBIND11_FILTER_TESTS LISTNAME)
+  cmake_parse_arguments(ARG "" "MESSAGE" "" ${ARGN})
+  set(PYBIND11_FILTER_TESTS_FOUND OFF)
+  foreach(filename IN LISTS ARG_UNPARSED_ARGUMENTS)
+    list(FIND ${LISTNAME} ${filename} _FILE_FOUND)
+    if(_FILE_FOUND GREATER -1)
+      list(REMOVE_AT ${LISTNAME} ${_FILE_FOUND})
+      set(PYBIND11_FILTER_TESTS_FOUND ON)
+    endif()
+  endforeach()
+  if(PYBIND11_FILTER_TESTS_FOUND AND ARG_MESSAGE)
+    message(STATUS "${ARG_MESSAGE}")
+  endif()
+endmacro()
+
+# New Python support
+if(DEFINED Python_EXECUTABLE)
+  set(PYTHON_EXECUTABLE "${Python_EXECUTABLE}")
+  set(PYTHON_VERSION "${Python_VERSION}")
+endif()
+
+# There's no harm in including a project in a project
+project(pybind11_tests CXX)
+
+# Access FindCatch and more
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/../tools")
+
+option(PYBIND11_WERROR "Report all warnings as errors" OFF)
+option(DOWNLOAD_EIGEN "Download EIGEN (requires CMake 3.11+)" OFF)
+option(PYBIND11_CUDA_TESTS "Enable building CUDA tests (requires CMake 3.12+)" OFF)
+set(PYBIND11_TEST_OVERRIDE
+    ""
+    CACHE STRING "Tests from ;-separated list of *.cpp files will be built instead of all tests")
+set(PYBIND11_TEST_FILTER
+    ""
+    CACHE STRING "Tests from ;-separated list of *.cpp files will be removed from all tests")
+
+if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+  # We're being loaded directly, i.e. not via add_subdirectory, so make this
+  # work as its own project and load the pybind11Config to get the tools we need
+  find_package(pybind11 REQUIRED CONFIG)
 endif()
 
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   message(STATUS "Setting tests build type to MinSizeRel as none was specified")
-  set(CMAKE_BUILD_TYPE MinSizeRel CACHE STRING "Choose the type of build." FORCE)
-  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
-    "MinSizeRel" "RelWithDebInfo")
+  set(CMAKE_BUILD_TYPE
+      MinSizeRel
+      CACHE STRING "Choose the type of build." FORCE)
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel"
+                                               "RelWithDebInfo")
+endif()
+
+if(PYBIND11_CUDA_TESTS)
+  enable_language(CUDA)
+  if(DEFINED CMAKE_CXX_STANDARD)
+    set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
+  endif()
+  set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 endif()
 
 # Full set of test files (you can override these; see below)
 set(PYBIND11_TEST_FILES
-  test_async.cpp
-  test_buffers.cpp
-  test_builtin_casters.cpp
-  test_call_policies.cpp
-  test_callbacks.cpp
-  test_chrono.cpp
-  test_class.cpp
-  test_constants_and_functions.cpp
-  test_copy_move.cpp
-  test_docstring_options.cpp
-  test_eigen.cpp
-  test_enum.cpp
-  test_eval.cpp
-  test_exceptions.cpp
-  test_factory_constructors.cpp
-  test_gil_scoped.cpp
-  test_iostream.cpp
-  test_kwargs_and_defaults.cpp
-  test_local_bindings.cpp
-  test_methods_and_attributes.cpp
-  test_modules.cpp
-  test_multiple_inheritance.cpp
-  test_numpy_array.cpp
-  test_numpy_dtypes.cpp
-  test_numpy_vectorize.cpp
-  test_opaque_types.cpp
-  test_operator_overloading.cpp
-  test_pickling.cpp
-  test_pytypes.cpp
-  test_sequences_and_iterators.cpp
-  test_smart_ptr.cpp
-  test_stl.cpp
-  test_stl_binders.cpp
-  test_tagbased_polymorphic.cpp
-  test_union.cpp
-  test_virtual_functions.cpp
-)
+    test_async.cpp
+    test_buffers.cpp
+    test_builtin_casters.cpp
+    test_call_policies.cpp
+    test_callbacks.cpp
+    test_chrono.cpp
+    test_class.cpp
+    test_constants_and_functions.cpp
+    test_copy_move.cpp
+    test_custom_type_casters.cpp
+    test_docstring_options.cpp
+    test_eigen.cpp
+    test_enum.cpp
+    test_eval.cpp
+    test_exceptions.cpp
+    test_factory_constructors.cpp
+    test_gil_scoped.cpp
+    test_iostream.cpp
+    test_kwargs_and_defaults.cpp
+    test_local_bindings.cpp
+    test_methods_and_attributes.cpp
+    test_modules.cpp
+    test_multiple_inheritance.cpp
+    test_numpy_array.cpp
+    test_numpy_dtypes.cpp
+    test_numpy_vectorize.cpp
+    test_opaque_types.cpp
+    test_operator_overloading.cpp
+    test_pickling.cpp
+    test_pytypes.cpp
+    test_sequences_and_iterators.cpp
+    test_smart_ptr.cpp
+    test_stl.cpp
+    test_stl_binders.cpp
+    test_tagbased_polymorphic.cpp
+    test_union.cpp
+    test_virtual_functions.cpp)
 
 # Invoking cmake with something like:
-#     cmake -DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_picking.cpp" ..
+#     cmake -DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp" ..
 # lets you override the tests that get compiled and run.  You can restore to all tests with:
 #     cmake -DPYBIND11_TEST_OVERRIDE= ..
-if (PYBIND11_TEST_OVERRIDE)
+if(PYBIND11_TEST_OVERRIDE)
   set(PYBIND11_TEST_FILES ${PYBIND11_TEST_OVERRIDE})
 endif()
 
-# Skip test_async for Python < 3.5
-list(FIND PYBIND11_TEST_FILES test_async.cpp PYBIND11_TEST_FILES_ASYNC_I)
-if((PYBIND11_TEST_FILES_ASYNC_I GREATER -1) AND ("${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}" VERSION_LESS 3.5))
-  message(STATUS "Skipping test_async because Python version ${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR} < 3.5")
-  list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_ASYNC_I})
+# You can also filter tests:
+if(PYBIND11_TEST_FILTER)
+  pybind11_filter_tests(PYBIND11_TEST_FILES ${PYBIND11_TEST_FILTER})
+endif()
+
+if(PYTHON_VERSION VERSION_LESS 3.5)
+  pybind11_filter_tests(PYBIND11_TEST_FILES test_async.cpp MESSAGE
+                        "Skipping test_async on Python 2")
+endif()
+
+# Skip tests for CUDA check:
+# /pybind11/tests/test_constants_and_functions.cpp(125):
+#   error: incompatible exception specifications
+if(PYBIND11_CUDA_TESTS)
+  pybind11_filter_tests(
+    PYBIND11_TEST_FILES test_constants_and_functions.cpp MESSAGE
+    "Skipping test_constants_and_functions due to incompatible exception specifications")
 endif()
 
 string(REPLACE ".cpp" ".py" PYBIND11_PYTEST_FILES "${PYBIND11_TEST_FILES}")
@@ -84,16 +156,10 @@
 # Contains the set of test files that require pybind11_cross_module_tests to be
 # built; if none of these are built (i.e. because TEST_OVERRIDE is used and
 # doesn't include them) the second module doesn't get built.
-set(PYBIND11_CROSS_MODULE_TESTS
-  test_exceptions.py
-  test_local_bindings.py
-  test_stl.py
-  test_stl_binders.py
-)
+set(PYBIND11_CROSS_MODULE_TESTS test_exceptions.py test_local_bindings.py test_stl.py
+                                test_stl_binders.py)
 
-set(PYBIND11_CROSS_MODULE_GIL_TESTS
-  test_gil_scoped.py
-)
+set(PYBIND11_CROSS_MODULE_GIL_TESTS test_gil_scoped.py)
 
 # Check if Eigen is available; if not, remove from PYBIND11_TEST_FILES (but
 # keep it in PYBIND11_PYTEST_FILES, so that we get the "eigen is not installed"
@@ -103,21 +169,45 @@
   # Try loading via newer Eigen's Eigen3Config first (bypassing tools/FindEigen3.cmake).
   # Eigen 3.3.1+ exports a cmake 3.0+ target for handling dependency requirements, but also
   # produces a fatal error if loaded from a pre-3.0 cmake.
-  if (NOT CMAKE_VERSION VERSION_LESS 3.0)
-    find_package(Eigen3 3.2.7 QUIET CONFIG)
-    if (EIGEN3_FOUND)
-      if (EIGEN3_VERSION_STRING AND NOT EIGEN3_VERSION_STRING VERSION_LESS 3.3.1)
-        set(PYBIND11_EIGEN_VIA_TARGET 1)
-      endif()
+  if(DOWNLOAD_EIGEN)
+    if(CMAKE_VERSION VERSION_LESS 3.11)
+      message(FATAL_ERROR "CMake 3.11+ required when using DOWNLOAD_EIGEN")
     endif()
-  endif()
-  if (NOT EIGEN3_FOUND)
-    # Couldn't load via target, so fall back to allowing module mode finding, which will pick up
-    # tools/FindEigen3.cmake
-    find_package(Eigen3 3.2.7 QUIET)
+
+    set(EIGEN3_VERSION_STRING "3.3.8")
+
+    include(FetchContent)
+    FetchContent_Declare(
+      eigen
+      GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
+      GIT_TAG ${EIGEN3_VERSION_STRING})
+
+    FetchContent_GetProperties(eigen)
+    if(NOT eigen_POPULATED)
+      message(STATUS "Downloading Eigen")
+      FetchContent_Populate(eigen)
+    endif()
+
+    set(EIGEN3_INCLUDE_DIR ${eigen_SOURCE_DIR})
+    set(EIGEN3_FOUND TRUE)
+
+  else()
+    find_package(Eigen3 3.2.7 QUIET CONFIG)
+
+    if(NOT EIGEN3_FOUND)
+      # Couldn't load via target, so fall back to allowing module mode finding, which will pick up
+      # tools/FindEigen3.cmake
+      find_package(Eigen3 3.2.7 QUIET)
+    endif()
   endif()
 
   if(EIGEN3_FOUND)
+    if(NOT TARGET Eigen3::Eigen)
+      add_library(Eigen3::Eigen IMPORTED INTERFACE)
+      set_property(TARGET Eigen3::Eigen PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+                                                 "${EIGEN3_INCLUDE_DIR}")
+    endif()
+
     # Eigen 3.3.1+ cmake sets EIGEN3_VERSION_STRING (and hard codes the version when installed
     # rather than looking it up in the cmake script); older versions, and the
     # tools/FindEigen3.cmake, set EIGEN3_VERSION instead.
@@ -127,28 +217,63 @@
     message(STATUS "Building tests with Eigen v${EIGEN3_VERSION}")
   else()
     list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_EIGEN_I})
-    message(STATUS "Building tests WITHOUT Eigen")
+    message(STATUS "Building tests WITHOUT Eigen, use -DDOWNLOAD_EIGEN on CMake 3.11+ to download")
   endif()
 endif()
 
 # Optional dependency for some tests (boost::variant is only supported with version >= 1.56)
 find_package(Boost 1.56)
 
+if(Boost_FOUND)
+  if(NOT TARGET Boost::headers)
+    add_library(Boost::headers IMPORTED INTERFACE)
+    if(TARGET Boost::boost)
+      # Classic FindBoost
+      set_property(TARGET Boost::boost PROPERTY INTERFACE_LINK_LIBRARIES Boost::boost)
+    else()
+      # Very old FindBoost, or newer Boost than CMake in older CMakes
+      set_property(TARGET Boost::headers PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+                                                  ${Boost_INCLUDE_DIRS})
+    endif()
+  endif()
+endif()
+
 # Compile with compiler warnings turned on
 function(pybind11_enable_warnings target_name)
   if(MSVC)
     target_compile_options(${target_name} PRIVATE /W4)
-  elseif(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Intel|Clang)")
-      target_compile_options(${target_name} PRIVATE -Wall -Wextra -Wconversion -Wcast-qual -Wdeprecated)
+  elseif(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Intel|Clang)" AND NOT PYBIND11_CUDA_TESTS)
+    target_compile_options(
+      ${target_name}
+      PRIVATE -Wall
+              -Wextra
+              -Wconversion
+              -Wcast-qual
+              -Wdeprecated
+              -Wundef
+              -Wnon-virtual-dtor)
   endif()
 
   if(PYBIND11_WERROR)
     if(MSVC)
       target_compile_options(${target_name} PRIVATE /WX)
+    elseif(PYBIND11_CUDA_TESTS)
+      target_compile_options(${target_name} PRIVATE "SHELL:-Werror all-warnings")
     elseif(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Intel|Clang)")
       target_compile_options(${target_name} PRIVATE -Werror)
     endif()
   endif()
+
+  # Needs to be readded since the ordering requires these to be after the ones above
+  if(CMAKE_CXX_STANDARD
+     AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"
+     AND PYTHON_VERSION VERSION_LESS 3.0)
+    if(CMAKE_CXX_STANDARD LESS 17)
+      target_compile_options(${target_name} PUBLIC -Wno-deprecated-register)
+    else()
+      target_compile_options(${target_name} PUBLIC -Wno-register)
+    endif()
+  endif()
 endfunction()
 
 set(test_targets pybind11_tests)
@@ -156,7 +281,7 @@
 # Build pybind11_cross_module_tests if any test_whatever.py are being built that require it
 foreach(t ${PYBIND11_CROSS_MODULE_TESTS})
   list(FIND PYBIND11_PYTEST_FILES ${t} i)
-  if (i GREATER -1)
+  if(i GREATER -1)
     list(APPEND test_targets pybind11_cross_module_tests)
     break()
   endif()
@@ -164,78 +289,101 @@
 
 foreach(t ${PYBIND11_CROSS_MODULE_GIL_TESTS})
   list(FIND PYBIND11_PYTEST_FILES ${t} i)
-  if (i GREATER -1)
+  if(i GREATER -1)
     list(APPEND test_targets cross_module_gil_utils)
     break()
   endif()
 endforeach()
 
-set(testdir ${CMAKE_CURRENT_SOURCE_DIR})
+# Support CUDA testing by forcing the target file to compile with NVCC
+if(PYBIND11_CUDA_TESTS)
+  set_property(SOURCE ${PYBIND11_TEST_FILES} PROPERTY LANGUAGE CUDA)
+endif()
+
 foreach(target ${test_targets})
   set(test_files ${PYBIND11_TEST_FILES})
-  if(NOT target STREQUAL "pybind11_tests")
+  if(NOT "${target}" STREQUAL "pybind11_tests")
     set(test_files "")
   endif()
 
+  # Support CUDA testing by forcing the target file to compile with NVCC
+  if(PYBIND11_CUDA_TESTS)
+    set_property(SOURCE ${target}.cpp PROPERTY LANGUAGE CUDA)
+  endif()
+
   # Create the binding library
   pybind11_add_module(${target} THIN_LTO ${target}.cpp ${test_files} ${PYBIND11_HEADERS})
   pybind11_enable_warnings(${target})
 
+  if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
+    get_property(
+      suffix
+      TARGET ${target}
+      PROPERTY SUFFIX)
+    set(source_output "${CMAKE_CURRENT_SOURCE_DIR}/${target}${suffix}")
+    if(suffix AND EXISTS "${source_output}")
+      message(WARNING "Output file also in source directory; "
+                      "please remove to avoid confusion: ${source_output}")
+    endif()
+  endif()
+
   if(MSVC)
     target_compile_options(${target} PRIVATE /utf-8)
   endif()
 
   if(EIGEN3_FOUND)
-    if (PYBIND11_EIGEN_VIA_TARGET)
-      target_link_libraries(${target} PRIVATE Eigen3::Eigen)
-    else()
-      target_include_directories(${target} PRIVATE ${EIGEN3_INCLUDE_DIR})
-    endif()
+    target_link_libraries(${target} PRIVATE Eigen3::Eigen)
     target_compile_definitions(${target} PRIVATE -DPYBIND11_TEST_EIGEN)
   endif()
 
   if(Boost_FOUND)
-    target_include_directories(${target} PRIVATE ${Boost_INCLUDE_DIRS})
+    target_link_libraries(${target} PRIVATE Boost::headers)
     target_compile_definitions(${target} PRIVATE -DPYBIND11_TEST_BOOST)
   endif()
 
   # Always write the output file directly into the 'tests' directory (even on MSVC)
   if(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
-    set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${testdir})
+    set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
+                                               "${CMAKE_CURRENT_BINARY_DIR}")
     foreach(config ${CMAKE_CONFIGURATION_TYPES})
       string(TOUPPER ${config} config)
-      set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config} ${testdir})
+      set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${config}
+                                                 "${CMAKE_CURRENT_BINARY_DIR}")
     endforeach()
   endif()
 endforeach()
 
-# Make sure pytest is found or produce a fatal error
-if(NOT PYBIND11_PYTEST_FOUND)
-  execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import pytest; print(pytest.__version__)"
-                  RESULT_VARIABLE pytest_not_found OUTPUT_VARIABLE pytest_version ERROR_QUIET)
-  if(pytest_not_found)
-    message(FATAL_ERROR "Running the tests requires pytest. Please install it manually"
-                        " (try: ${PYTHON_EXECUTABLE} -m pip install pytest)")
-  elseif(pytest_version VERSION_LESS 3.0)
-    message(FATAL_ERROR "Running the tests requires pytest >= 3.0. Found: ${pytest_version}"
-                        "Please update it (try: ${PYTHON_EXECUTABLE} -m pip install -U pytest)")
-  endif()
-  set(PYBIND11_PYTEST_FOUND TRUE CACHE INTERNAL "")
+# Make sure pytest is found or produce a warning
+pybind11_find_import(pytest VERSION 3.1)
+
+if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
+  # This is not used later in the build, so it's okay to regenerate each time.
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/pytest.ini" "${CMAKE_CURRENT_BINARY_DIR}/pytest.ini"
+                 COPYONLY)
+  file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/pytest.ini"
+       "\ntestpaths = \"${CMAKE_CURRENT_SOURCE_DIR}\"")
+
 endif()
 
-if(CMAKE_VERSION VERSION_LESS 3.2)
-  set(PYBIND11_USES_TERMINAL "")
-else()
-  set(PYBIND11_USES_TERMINAL "USES_TERMINAL")
-endif()
+# cmake 3.12 added list(transform <list> prepend
+# but we can't use it yet
+string(REPLACE "test_" "${CMAKE_CURRENT_SOURCE_DIR}/test_" PYBIND11_ABS_PYTEST_FILES
+               "${PYBIND11_PYTEST_FILES}")
 
 # A single command to compile and run the tests
-add_custom_target(pytest COMMAND ${PYTHON_EXECUTABLE} -m pytest ${PYBIND11_PYTEST_FILES}
-                  DEPENDS ${test_targets} WORKING_DIRECTORY ${testdir} ${PYBIND11_USES_TERMINAL})
+add_custom_target(
+  pytest
+  COMMAND ${PYTHON_EXECUTABLE} -m pytest ${PYBIND11_ABS_PYTEST_FILES}
+  DEPENDS ${test_targets}
+  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  USES_TERMINAL)
 
 if(PYBIND11_TEST_OVERRIDE)
-  add_custom_command(TARGET pytest POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E echo "Note: not all tests run: -DPYBIND11_TEST_OVERRIDE is in effect")
+  add_custom_command(
+    TARGET pytest
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E echo
+            "Note: not all tests run: -DPYBIND11_TEST_OVERRIDE is in effect")
 endif()
 
 # Add a check target to run all the tests, starting with pytest (we add dependencies to this below)
@@ -243,17 +391,23 @@
 
 # The remaining tests only apply when being built as part of the pybind11 project, but not if the
 # tests are being built independently.
-if (NOT PROJECT_NAME STREQUAL "pybind11")
+if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
   return()
 endif()
 
 # Add a post-build comment to show the primary test suite .so size and, if a previous size, compare it:
-add_custom_command(TARGET pybind11_tests POST_BUILD
-  COMMAND ${PYTHON_EXECUTABLE} ${PROJECT_SOURCE_DIR}/tools/libsize.py
-  $<TARGET_FILE:pybind11_tests> ${CMAKE_CURRENT_BINARY_DIR}/sosize-$<TARGET_FILE_NAME:pybind11_tests>.txt)
+add_custom_command(
+  TARGET pybind11_tests
+  POST_BUILD
+  COMMAND
+    ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../tools/libsize.py
+    $<TARGET_FILE:pybind11_tests>
+    ${CMAKE_CURRENT_BINARY_DIR}/sosize-$<TARGET_FILE_NAME:pybind11_tests>.txt)
 
-# Test embedding the interpreter. Provides the `cpptest` target.
-add_subdirectory(test_embed)
+if(NOT PYBIND11_CUDA_TESTS)
+  # Test embedding the interpreter. Provides the `cpptest` target.
+  add_subdirectory(test_embed)
 
-# Test CMake build using functions and targets from subdirectory or installed location
-add_subdirectory(test_cmake_build)
+  # Test CMake build using functions and targets from subdirectory or installed location
+  add_subdirectory(test_cmake_build)
+endif()
diff --git a/tests/conftest.py b/tests/conftest.py
index 57f681c..362eb80 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,31 +1,36 @@
+# -*- coding: utf-8 -*-
 """pytest configuration
 
 Extends output capture as needed by pybind11: ignore constructors, optional unordered lines.
 Adds docstring and exceptions message sanitizers: ignore Python 2 vs 3 differences.
 """
 
-import pytest
-import textwrap
-import difflib
-import re
-import sys
 import contextlib
-import platform
+import difflib
 import gc
+import re
+import textwrap
 
-_unicode_marker = re.compile(r'u(\'[^\']*\')')
-_long_marker = re.compile(r'([0-9])L')
-_hexadecimal = re.compile(r'0x[0-9a-fA-F]+')
+import pytest
 
-# test_async.py requires support for async and await
+import env
+
+# Early diagnostic for failed imports
+import pybind11_tests  # noqa: F401
+
+_unicode_marker = re.compile(r"u(\'[^\']*\')")
+_long_marker = re.compile(r"([0-9])L")
+_hexadecimal = re.compile(r"0x[0-9a-fA-F]+")
+
+# Avoid collecting Python3 only files
 collect_ignore = []
-if sys.version_info[:2] < (3, 5):
+if env.PY2:
     collect_ignore.append("test_async.py")
 
 
 def _strip_and_dedent(s):
     """For triple-quote strings"""
-    return textwrap.dedent(s.lstrip('\n').rstrip())
+    return textwrap.dedent(s.lstrip("\n").rstrip())
 
 
 def _split_and_sort(s):
@@ -35,11 +40,14 @@
 
 def _make_explanation(a, b):
     """Explanation for a failed assert -- the a and b arguments are List[str]"""
-    return ["--- actual / +++ expected"] + [line.strip('\n') for line in difflib.ndiff(a, b)]
+    return ["--- actual / +++ expected"] + [
+        line.strip("\n") for line in difflib.ndiff(a, b)
+    ]
 
 
 class Output(object):
     """Basic output post-processing and comparison"""
+
     def __init__(self, string):
         self.string = string
         self.explanation = []
@@ -49,7 +57,11 @@
 
     def __eq__(self, other):
         # Ignore constructor/destructor output which is prefixed with "###"
-        a = [line for line in self.string.strip().splitlines() if not line.startswith("###")]
+        a = [
+            line
+            for line in self.string.strip().splitlines()
+            if not line.startswith("###")
+        ]
         b = _strip_and_dedent(other).splitlines()
         if a == b:
             return True
@@ -60,6 +72,7 @@
 
 class Unordered(Output):
     """Custom comparison for output without strict line ordering"""
+
     def __eq__(self, other):
         a = _split_and_sort(self.string)
         b = _split_and_sort(other)
@@ -170,7 +183,7 @@
 # noinspection PyUnusedLocal
 def pytest_assertrepr_compare(op, left, right):
     """Hook to insert custom failure explanation"""
-    if hasattr(left, 'explanation'):
+    if hasattr(left, "explanation"):
         return left.explanation
 
 
@@ -184,61 +197,12 @@
 
 
 def gc_collect():
-    ''' Run the garbage collector twice (needed when running
-    reference counting tests with PyPy) '''
+    """Run the garbage collector twice (needed when running
+    reference counting tests with PyPy)"""
     gc.collect()
     gc.collect()
 
 
 def pytest_configure():
-    """Add import suppression and test requirements to `pytest` namespace"""
-    try:
-        import numpy as np
-    except ImportError:
-        np = None
-    try:
-        import scipy
-    except ImportError:
-        scipy = None
-    try:
-        from pybind11_tests.eigen import have_eigen
-    except ImportError:
-        have_eigen = False
-    pypy = platform.python_implementation() == "PyPy"
-
-    skipif = pytest.mark.skipif
     pytest.suppress = suppress
-    pytest.requires_numpy = skipif(not np, reason="numpy is not installed")
-    pytest.requires_scipy = skipif(not np, reason="scipy is not installed")
-    pytest.requires_eigen_and_numpy = skipif(not have_eigen or not np,
-                                             reason="eigen and/or numpy are not installed")
-    pytest.requires_eigen_and_scipy = skipif(
-        not have_eigen or not scipy, reason="eigen and/or scipy are not installed")
-    pytest.unsupported_on_pypy = skipif(pypy, reason="unsupported on PyPy")
-    pytest.unsupported_on_py2 = skipif(sys.version_info.major < 3,
-                                       reason="unsupported on Python 2.x")
     pytest.gc_collect = gc_collect
-
-
-def _test_import_pybind11():
-    """Early diagnostic for test module initialization errors
-
-    When there is an error during initialization, the first import will report the
-    real error while all subsequent imports will report nonsense. This import test
-    is done early (in the pytest configuration file, before any tests) in order to
-    avoid the noise of having all tests fail with identical error messages.
-
-    Any possible exception is caught here and reported manually *without* the stack
-    trace. This further reduces noise since the trace would only show pytest internals
-    which are not useful for debugging pybind11 module issues.
-    """
-    # noinspection PyBroadException
-    try:
-        import pybind11_tests  # noqa: F401 imported but unused
-    except Exception as e:
-        print("Failed to import pybind11_tests from pytest:")
-        print("  {}: {}".format(type(e).__name__, e))
-        sys.exit(1)
-
-
-_test_import_pybind11()
diff --git a/tests/constructor_stats.h b/tests/constructor_stats.h
index 431e5ac..805968a 100644
--- a/tests/constructor_stats.h
+++ b/tests/constructor_stats.h
@@ -120,7 +120,7 @@
             throw py::error_already_set();
         Py_DECREF(result);
 #else
-        py::module::import("gc").attr("collect")();
+        py::module_::import("gc").attr("collect")();
 #endif
     }
 
@@ -273,4 +273,3 @@
     print_constr_details(inst, ":", values...);
     track_values(inst, values...);
 }
-
diff --git a/tests/env.py b/tests/env.py
new file mode 100644
index 0000000..5cded44
--- /dev/null
+++ b/tests/env.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+import platform
+import sys
+
+LINUX = sys.platform.startswith("linux")
+MACOS = sys.platform.startswith("darwin")
+WIN = sys.platform.startswith("win32") or sys.platform.startswith("cygwin")
+
+CPYTHON = platform.python_implementation() == "CPython"
+PYPY = platform.python_implementation() == "PyPy"
+
+PY2 = sys.version_info.major == 2
+
+PY = sys.version_info
diff --git a/tests/extra_python_package/pytest.ini b/tests/extra_python_package/pytest.ini
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/extra_python_package/pytest.ini
diff --git a/tests/extra_python_package/test_files.py b/tests/extra_python_package/test_files.py
new file mode 100644
index 0000000..cbd4bff
--- /dev/null
+++ b/tests/extra_python_package/test_files.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+import contextlib
+import os
+import string
+import subprocess
+import sys
+import tarfile
+import zipfile
+
+# These tests must be run explicitly
+# They require CMake 3.15+ (--install)
+
+DIR = os.path.abspath(os.path.dirname(__file__))
+MAIN_DIR = os.path.dirname(os.path.dirname(DIR))
+
+
+main_headers = {
+    "include/pybind11/attr.h",
+    "include/pybind11/buffer_info.h",
+    "include/pybind11/cast.h",
+    "include/pybind11/chrono.h",
+    "include/pybind11/common.h",
+    "include/pybind11/complex.h",
+    "include/pybind11/eigen.h",
+    "include/pybind11/embed.h",
+    "include/pybind11/eval.h",
+    "include/pybind11/functional.h",
+    "include/pybind11/iostream.h",
+    "include/pybind11/numpy.h",
+    "include/pybind11/operators.h",
+    "include/pybind11/options.h",
+    "include/pybind11/pybind11.h",
+    "include/pybind11/pytypes.h",
+    "include/pybind11/stl.h",
+    "include/pybind11/stl_bind.h",
+}
+
+detail_headers = {
+    "include/pybind11/detail/class.h",
+    "include/pybind11/detail/common.h",
+    "include/pybind11/detail/descr.h",
+    "include/pybind11/detail/init.h",
+    "include/pybind11/detail/internals.h",
+    "include/pybind11/detail/typeid.h",
+}
+
+cmake_files = {
+    "share/cmake/pybind11/FindPythonLibsNew.cmake",
+    "share/cmake/pybind11/pybind11Common.cmake",
+    "share/cmake/pybind11/pybind11Config.cmake",
+    "share/cmake/pybind11/pybind11ConfigVersion.cmake",
+    "share/cmake/pybind11/pybind11NewTools.cmake",
+    "share/cmake/pybind11/pybind11Targets.cmake",
+    "share/cmake/pybind11/pybind11Tools.cmake",
+}
+
+py_files = {
+    "__init__.py",
+    "__main__.py",
+    "_version.py",
+    "_version.pyi",
+    "commands.py",
+    "py.typed",
+    "setup_helpers.py",
+    "setup_helpers.pyi",
+}
+
+headers = main_headers | detail_headers
+src_files = headers | cmake_files
+all_files = src_files | py_files
+
+
+sdist_files = {
+    "pybind11",
+    "pybind11/include",
+    "pybind11/include/pybind11",
+    "pybind11/include/pybind11/detail",
+    "pybind11/share",
+    "pybind11/share/cmake",
+    "pybind11/share/cmake/pybind11",
+    "pyproject.toml",
+    "setup.cfg",
+    "setup.py",
+    "LICENSE",
+    "MANIFEST.in",
+    "README.rst",
+    "PKG-INFO",
+}
+
+local_sdist_files = {
+    ".egg-info",
+    ".egg-info/PKG-INFO",
+    ".egg-info/SOURCES.txt",
+    ".egg-info/dependency_links.txt",
+    ".egg-info/not-zip-safe",
+    ".egg-info/top_level.txt",
+}
+
+
+def test_build_sdist(monkeypatch, tmpdir):
+
+    monkeypatch.chdir(MAIN_DIR)
+
+    out = subprocess.check_output(
+        [
+            sys.executable,
+            "setup.py",
+            "sdist",
+            "--formats=tar",
+            "--dist-dir",
+            str(tmpdir),
+        ]
+    )
+    if hasattr(out, "decode"):
+        out = out.decode()
+
+    (sdist,) = tmpdir.visit("*.tar")
+
+    with tarfile.open(str(sdist)) as tar:
+        start = tar.getnames()[0] + "/"
+        version = start[9:-1]
+        simpler = set(n.split("/", 1)[-1] for n in tar.getnames()[1:])
+
+        with contextlib.closing(
+            tar.extractfile(tar.getmember(start + "setup.py"))
+        ) as f:
+            setup_py = f.read()
+
+        with contextlib.closing(
+            tar.extractfile(tar.getmember(start + "pyproject.toml"))
+        ) as f:
+            pyproject_toml = f.read()
+
+    files = set("pybind11/{}".format(n) for n in all_files)
+    files |= sdist_files
+    files |= set("pybind11{}".format(n) for n in local_sdist_files)
+    files.add("pybind11.egg-info/entry_points.txt")
+    files.add("pybind11.egg-info/requires.txt")
+    assert simpler == files
+
+    with open(os.path.join(MAIN_DIR, "tools", "setup_main.py.in"), "rb") as f:
+        contents = (
+            string.Template(f.read().decode())
+            .substitute(version=version, extra_cmd="")
+            .encode()
+        )
+        assert setup_py == contents
+
+    with open(os.path.join(MAIN_DIR, "tools", "pyproject.toml"), "rb") as f:
+        contents = f.read()
+        assert pyproject_toml == contents
+
+
+def test_build_global_dist(monkeypatch, tmpdir):
+
+    monkeypatch.chdir(MAIN_DIR)
+    monkeypatch.setenv("PYBIND11_GLOBAL_SDIST", "1")
+
+    out = subprocess.check_output(
+        [
+            sys.executable,
+            "setup.py",
+            "sdist",
+            "--formats=tar",
+            "--dist-dir",
+            str(tmpdir),
+        ]
+    )
+    if hasattr(out, "decode"):
+        out = out.decode()
+
+    (sdist,) = tmpdir.visit("*.tar")
+
+    with tarfile.open(str(sdist)) as tar:
+        start = tar.getnames()[0] + "/"
+        version = start[16:-1]
+        simpler = set(n.split("/", 1)[-1] for n in tar.getnames()[1:])
+
+        with contextlib.closing(
+            tar.extractfile(tar.getmember(start + "setup.py"))
+        ) as f:
+            setup_py = f.read()
+
+        with contextlib.closing(
+            tar.extractfile(tar.getmember(start + "pyproject.toml"))
+        ) as f:
+            pyproject_toml = f.read()
+
+    files = set("pybind11/{}".format(n) for n in all_files)
+    files |= sdist_files
+    files |= set("pybind11_global{}".format(n) for n in local_sdist_files)
+    assert simpler == files
+
+    with open(os.path.join(MAIN_DIR, "tools", "setup_global.py.in"), "rb") as f:
+        contents = (
+            string.Template(f.read().decode())
+            .substitute(version=version, extra_cmd="")
+            .encode()
+        )
+        assert setup_py == contents
+
+    with open(os.path.join(MAIN_DIR, "tools", "pyproject.toml"), "rb") as f:
+        contents = f.read()
+        assert pyproject_toml == contents
+
+
+def tests_build_wheel(monkeypatch, tmpdir):
+    monkeypatch.chdir(MAIN_DIR)
+
+    subprocess.check_output(
+        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)]
+    )
+
+    (wheel,) = tmpdir.visit("*.whl")
+
+    files = set("pybind11/{}".format(n) for n in all_files)
+    files |= {
+        "dist-info/LICENSE",
+        "dist-info/METADATA",
+        "dist-info/RECORD",
+        "dist-info/WHEEL",
+        "dist-info/entry_points.txt",
+        "dist-info/top_level.txt",
+    }
+
+    with zipfile.ZipFile(str(wheel)) as z:
+        names = z.namelist()
+
+    trimmed = set(n for n in names if "dist-info" not in n)
+    trimmed |= set(
+        "dist-info/{}".format(n.split("/", 1)[-1]) for n in names if "dist-info" in n
+    )
+    assert files == trimmed
+
+
+def tests_build_global_wheel(monkeypatch, tmpdir):
+    monkeypatch.chdir(MAIN_DIR)
+    monkeypatch.setenv("PYBIND11_GLOBAL_SDIST", "1")
+
+    subprocess.check_output(
+        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)]
+    )
+
+    (wheel,) = tmpdir.visit("*.whl")
+
+    files = set("data/data/{}".format(n) for n in src_files)
+    files |= set("data/headers/{}".format(n[8:]) for n in headers)
+    files |= {
+        "dist-info/LICENSE",
+        "dist-info/METADATA",
+        "dist-info/WHEEL",
+        "dist-info/top_level.txt",
+        "dist-info/RECORD",
+    }
+
+    with zipfile.ZipFile(str(wheel)) as z:
+        names = z.namelist()
+
+    beginning = names[0].split("/", 1)[0].rsplit(".", 1)[0]
+    trimmed = set(n[len(beginning) + 1 :] for n in names)
+
+    assert files == trimmed
diff --git a/tests/extra_setuptools/pytest.ini b/tests/extra_setuptools/pytest.ini
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/extra_setuptools/pytest.ini
diff --git a/tests/extra_setuptools/test_setuphelper.py b/tests/extra_setuptools/test_setuphelper.py
new file mode 100644
index 0000000..0d8bd0e
--- /dev/null
+++ b/tests/extra_setuptools/test_setuphelper.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+import subprocess
+from textwrap import dedent
+
+import pytest
+
+DIR = os.path.abspath(os.path.dirname(__file__))
+MAIN_DIR = os.path.dirname(os.path.dirname(DIR))
+
+
+@pytest.mark.parametrize("parallel", [False, True])
+@pytest.mark.parametrize("std", [11, 0])
+def test_simple_setup_py(monkeypatch, tmpdir, parallel, std):
+    monkeypatch.chdir(tmpdir)
+    monkeypatch.syspath_prepend(MAIN_DIR)
+
+    (tmpdir / "setup.py").write_text(
+        dedent(
+            u"""\
+            import sys
+            sys.path.append({MAIN_DIR!r})
+
+            from setuptools import setup, Extension
+            from pybind11.setup_helpers import build_ext, Pybind11Extension
+
+            std = {std}
+
+            ext_modules = [
+                Pybind11Extension(
+                    "simple_setup",
+                    sorted(["main.cpp"]),
+                    cxx_std=std,
+                ),
+            ]
+
+            cmdclass = dict()
+            if std == 0:
+                cmdclass["build_ext"] = build_ext
+
+
+            parallel = {parallel}
+            if parallel:
+                from pybind11.setup_helpers import ParallelCompile
+                ParallelCompile().install()
+
+            setup(
+                name="simple_setup_package",
+                cmdclass=cmdclass,
+                ext_modules=ext_modules,
+            )
+            """
+        ).format(MAIN_DIR=MAIN_DIR, std=std, parallel=parallel),
+        encoding="ascii",
+    )
+
+    (tmpdir / "main.cpp").write_text(
+        dedent(
+            u"""\
+            #include <pybind11/pybind11.h>
+
+            int f(int x) {
+                return x * 3;
+            }
+            PYBIND11_MODULE(simple_setup, m) {
+                m.def("f", &f);
+            }
+            """
+        ),
+        encoding="ascii",
+    )
+
+    subprocess.check_call(
+        [sys.executable, "setup.py", "build_ext", "--inplace"],
+        stdout=sys.stdout,
+        stderr=sys.stderr,
+    )
+
+    # Debug helper printout, normally hidden
+    for item in tmpdir.listdir():
+        print(item.basename)
+
+    assert (
+        len([f for f in tmpdir.listdir() if f.basename.startswith("simple_setup")]) == 1
+    )
+    assert len(list(tmpdir.listdir())) == 4  # two files + output + build_dir
+
+    (tmpdir / "test.py").write_text(
+        dedent(
+            u"""\
+            import simple_setup
+            assert simple_setup.f(3) == 9
+            """
+        ),
+        encoding="ascii",
+    )
+
+    subprocess.check_call(
+        [sys.executable, "test.py"], stdout=sys.stdout, stderr=sys.stderr
+    )
diff --git a/tests/local_bindings.h b/tests/local_bindings.h
index b6afb80..22537b1 100644
--- a/tests/local_bindings.h
+++ b/tests/local_bindings.h
@@ -58,7 +58,7 @@
     std::string name_;
     const std::string &name() { return name_; }
 };
-}
+} // namespace pets
 
 struct MixGL { int i; MixGL(int i) : i{i} {} };
 struct MixGL2 { int i; MixGL2(int i) : i{i} {} };
diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp
index bc7d2c3..439cd40 100644
--- a/tests/pybind11_tests.cpp
+++ b/tests/pybind11_tests.cpp
@@ -26,23 +26,23 @@
 Instead, see the "How can I reduce the build time?" question in the "Frequently asked questions"
 section of the documentation for good practice on splitting binding code over multiple files.
 */
-std::list<std::function<void(py::module &)>> &initializers() {
-    static std::list<std::function<void(py::module &)>> inits;
+std::list<std::function<void(py::module_ &)>> &initializers() {
+    static std::list<std::function<void(py::module_ &)>> inits;
     return inits;
 }
 
 test_initializer::test_initializer(Initializer init) {
-    initializers().push_back(init);
+    initializers().emplace_back(init);
 }
 
 test_initializer::test_initializer(const char *submodule_name, Initializer init) {
-    initializers().push_back([=](py::module &parent) {
+    initializers().emplace_back([=](py::module_ &parent) {
         auto m = parent.def_submodule(submodule_name);
         init(m);
     });
 }
 
-void bind_ConstructorStats(py::module &m) {
+void bind_ConstructorStats(py::module_ &m) {
     py::class_<ConstructorStats>(m, "ConstructorStats")
         .def("alive", &ConstructorStats::alive)
         .def("values", &ConstructorStats::values)
@@ -88,6 +88,4 @@
 
     for (const auto &initializer : initializers())
         initializer(m);
-
-    if (!py::hasattr(m, "have_eigen")) m.attr("have_eigen") = false;
 }
diff --git a/tests/pybind11_tests.h b/tests/pybind11_tests.h
index 90963a5..4ff56c0 100644
--- a/tests/pybind11_tests.h
+++ b/tests/pybind11_tests.h
@@ -10,7 +10,7 @@
 using namespace pybind11::literals;
 
 class test_initializer {
-    using Initializer = void (*)(py::module &);
+    using Initializer = void (*)(py::module_ &);
 
 public:
     test_initializer(Initializer init);
@@ -18,9 +18,9 @@
 };
 
 #define TEST_SUBMODULE(name, variable)                   \
-    void test_submodule_##name(py::module &);            \
+    void test_submodule_##name(py::module_ &);            \
     test_initializer name(#name, test_submodule_##name); \
-    void test_submodule_##name(py::module &variable)
+    void test_submodule_##name(py::module_ &variable)
 
 
 /// Dummy type which is not exported anywhere -- something to trigger a conversion error
@@ -50,16 +50,22 @@
     IncType &operator=(IncType &&) = delete;
 };
 
+/// A simple union for basic testing
+union IntFloat {
+    int i;
+    float f;
+};
+
 /// Custom cast-only type that casts to a string "rvalue" or "lvalue" depending on the cast context.
 /// Used to test recursive casters (e.g. std::tuple, stl containers).
 struct RValueCaster {};
-NAMESPACE_BEGIN(pybind11)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(pybind11)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template<> class type_caster<RValueCaster> {
 public:
     PYBIND11_TYPE_CASTER(RValueCaster, _("RValueCaster"));
     static handle cast(RValueCaster &&, return_value_policy, handle) { return py::str("rvalue").release(); }
     static handle cast(const RValueCaster &, return_value_policy, handle) { return py::str("lvalue").release(); }
 };
-NAMESPACE_END(detail)
-NAMESPACE_END(pybind11)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(pybind11)
diff --git a/tests/pytest.ini b/tests/pytest.ini
index f209964..c47cbe9 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,11 +1,14 @@
 [pytest]
-minversion = 3.0
-norecursedirs = test_cmake_build test_embed
+minversion = 3.1
+norecursedirs = test_* extra_*
+xfail_strict = True
 addopts =
     # show summary of skipped tests
     -rs
     # capture only Python print and C++ py::print, but not C output (low-level Python errors)
     --capture=sys
+    # enable all warnings
+    -Wa
 filterwarnings =
     # make warnings into errors but ignore certain third-party extension issues
     error
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..80ed617
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://antocuni.github.io/pypy-wheels/manylinux2010/
+numpy==1.16.6; python_version<"3.6" and sys_platform!="win32"
+numpy==1.18.0; platform_python_implementation=="PyPy" and sys_platform=="darwin" and python_version>="3.6"
+numpy==1.19.3; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version>="3.6" and python_version<"3.10"
+pytest==4.6.9; python_version<"3.5"
+pytest==5.4.3; python_version>="3.5"
+scipy==1.2.3; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version<"3.6"
+scipy==1.5.2; (platform_python_implementation!="PyPy" or sys_platform=="linux") and python_version>="3.6" and python_version<"3.9"
diff --git a/tests/test_async.cpp b/tests/test_async.cpp
index f0ad0d5..e6e01d7 100644
--- a/tests/test_async.cpp
+++ b/tests/test_async.cpp
@@ -18,7 +18,7 @@
         .def(py::init<>())
         .def("__await__", [](const SupportsAsync& self) -> py::object {
             static_cast<void>(self);
-            py::object loop = py::module::import("asyncio.events").attr("get_event_loop")();
+            py::object loop = py::module_::import("asyncio.events").attr("get_event_loop")();
             py::object f = loop.attr("create_future")();
             f.attr("set_result")(5);
             return f.attr("__await__")();
diff --git a/tests/test_async.py b/tests/test_async.py
index e1c959d..df4489c 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -1,6 +1,8 @@
-import asyncio
+# -*- coding: utf-8 -*-
 import pytest
-from pybind11_tests import async_module as m
+
+asyncio = pytest.importorskip("asyncio")
+m = pytest.importorskip("pybind11_tests.async_module")
 
 
 @pytest.fixture
diff --git a/tests/test_buffers.cpp b/tests/test_buffers.cpp
index 1bc67ff..46eabf3 100644
--- a/tests/test_buffers.cpp
+++ b/tests/test_buffers.cpp
@@ -9,12 +9,13 @@
 
 #include "pybind11_tests.h"
 #include "constructor_stats.h"
+#include <pybind11/stl.h>
 
 TEST_SUBMODULE(buffers, m) {
     // test_from_python / test_to_python:
     class Matrix {
     public:
-        Matrix(ssize_t rows, ssize_t cols) : m_rows(rows), m_cols(cols) {
+        Matrix(py::ssize_t rows, py::ssize_t cols) : m_rows(rows), m_cols(cols) {
             print_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
             m_data = new float[(size_t) (rows*cols)];
             memset(m_data, 0, sizeof(float) * (size_t) (rows * cols));
@@ -58,25 +59,25 @@
             return *this;
         }
 
-        float operator()(ssize_t i, ssize_t j) const {
+        float operator()(py::ssize_t i, py::ssize_t j) const {
             return m_data[(size_t) (i*m_cols + j)];
         }
 
-        float &operator()(ssize_t i, ssize_t j) {
+        float &operator()(py::ssize_t i, py::ssize_t j) {
             return m_data[(size_t) (i*m_cols + j)];
         }
 
         float *data() { return m_data; }
 
-        ssize_t rows() const { return m_rows; }
-        ssize_t cols() const { return m_cols; }
+        py::ssize_t rows() const { return m_rows; }
+        py::ssize_t cols() const { return m_cols; }
     private:
-        ssize_t m_rows;
-        ssize_t m_cols;
+        py::ssize_t m_rows;
+        py::ssize_t m_cols;
         float *m_data;
     };
     py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
-        .def(py::init<ssize_t, ssize_t>())
+        .def(py::init<py::ssize_t, py::ssize_t>())
         /// Construct from a buffer
         .def(py::init([](py::buffer const b) {
             py::buffer_info info = b.request();
@@ -92,12 +93,12 @@
        .def("cols", &Matrix::cols)
 
         /// Bare bones interface
-       .def("__getitem__", [](const Matrix &m, std::pair<ssize_t, ssize_t> i) {
+       .def("__getitem__", [](const Matrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
             if (i.first >= m.rows() || i.second >= m.cols())
                 throw py::index_error();
             return m(i.first, i.second);
         })
-       .def("__setitem__", [](Matrix &m, std::pair<ssize_t, ssize_t> i, float v) {
+       .def("__setitem__", [](Matrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
             if (i.first >= m.rows() || i.second >= m.cols())
                 throw py::index_error();
             m(i.first, i.second) = v;
@@ -117,11 +118,11 @@
     // test_inherited_protocol
     class SquareMatrix : public Matrix {
     public:
-        SquareMatrix(ssize_t n) : Matrix(n, n) { }
+        SquareMatrix(py::ssize_t n) : Matrix(n, n) { }
     };
     // Derived classes inherit the buffer protocol and the buffer access function
     py::class_<SquareMatrix, Matrix>(m, "SquareMatrix")
-        .def(py::init<ssize_t>());
+        .def(py::init<py::ssize_t>());
 
 
     // test_pointer_to_member_fn
@@ -192,4 +193,22 @@
         .def_readwrite("readonly", &BufferReadOnlySelect::readonly)
         .def_buffer(&BufferReadOnlySelect::get_buffer_info);
 
+    // Expose buffer_info for testing.
+    py::class_<py::buffer_info>(m, "buffer_info")
+        .def(py::init<>())
+        .def_readonly("itemsize", &py::buffer_info::itemsize)
+        .def_readonly("size", &py::buffer_info::size)
+        .def_readonly("format", &py::buffer_info::format)
+        .def_readonly("ndim", &py::buffer_info::ndim)
+        .def_readonly("shape", &py::buffer_info::shape)
+        .def_readonly("strides", &py::buffer_info::strides)
+        .def_readonly("readonly", &py::buffer_info::readonly)
+        .def("__repr__", [](py::handle self) {
+             return py::str("itemsize={0.itemsize!r}, size={0.size!r}, format={0.format!r}, ndim={0.ndim!r}, shape={0.shape!r}, strides={0.strides!r}, readonly={0.readonly!r}").format(self);
+        })
+        ;
+
+    m.def("get_buffer_info", [](py::buffer buffer) {
+        return buffer.request();
+    });
 }
diff --git a/tests/test_buffers.py b/tests/test_buffers.py
index bf7aaed..f0f3708 100644
--- a/tests/test_buffers.py
+++ b/tests/test_buffers.py
@@ -1,18 +1,16 @@
+# -*- coding: utf-8 -*-
 import io
 import struct
-import sys
+import ctypes
 
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import buffers as m
 from pybind11_tests import ConstructorStats
 
-PY3 = sys.version_info[0] >= 3
-
-pytestmark = pytest.requires_numpy
-
-with pytest.suppress(ImportError):
-    import numpy as np
+np = pytest.importorskip("numpy")
 
 
 def test_from_python():
@@ -38,9 +36,7 @@
     assert cstats.move_assignments == 0
 
 
-# PyPy: Memory leak in the "np.array(m, copy=False)" call
-# https://bitbucket.org/pypy/pypy/issues/2444
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2444
 def test_to_python():
     mat = m.Matrix(5, 4)
     assert memoryview(mat).shape == (5, 4)
@@ -50,8 +46,8 @@
     mat[3, 2] = 7.0
     assert mat[2, 3] == 4
     assert mat[3, 2] == 7
-    assert struct.unpack_from('f', mat, (3 * 4 + 2) * 4) == (7, )
-    assert struct.unpack_from('f', mat, (2 * 4 + 3) * 4) == (4, )
+    assert struct.unpack_from("f", mat, (3 * 4 + 2) * 4) == (7,)
+    assert struct.unpack_from("f", mat, (2 * 4 + 3) * 4) == (4,)
 
     mat2 = np.array(mat, copy=False)
     assert mat2.shape == (5, 4)
@@ -75,7 +71,6 @@
     assert cstats.move_assignments == 0
 
 
-@pytest.unsupported_on_pypy
 def test_inherited_protocol():
     """SquareMatrix is derived from Matrix and inherits the buffer protocol"""
 
@@ -84,35 +79,84 @@
     assert np.asarray(matrix).shape == (5, 5)
 
 
-@pytest.unsupported_on_pypy
 def test_pointer_to_member_fn():
     for cls in [m.Buffer, m.ConstBuffer, m.DerivedBuffer]:
         buf = cls()
         buf.value = 0x12345678
-        value = struct.unpack('i', bytearray(buf))[0]
+        value = struct.unpack("i", bytearray(buf))[0]
         assert value == 0x12345678
 
 
-@pytest.unsupported_on_pypy
 def test_readonly_buffer():
     buf = m.BufferReadOnly(0x64)
     view = memoryview(buf)
-    assert view[0] == 0x64 if PY3 else b'd'
+    assert view[0] == b"d" if env.PY2 else 0x64
     assert view.readonly
 
 
-@pytest.unsupported_on_pypy
 def test_selective_readonly_buffer():
     buf = m.BufferReadOnlySelect()
 
-    memoryview(buf)[0] = 0x64 if PY3 else b'd'
+    memoryview(buf)[0] = b"d" if env.PY2 else 0x64
     assert buf.value == 0x64
 
-    io.BytesIO(b'A').readinto(buf)
-    assert buf.value == ord(b'A')
+    io.BytesIO(b"A").readinto(buf)
+    assert buf.value == ord(b"A")
 
     buf.readonly = True
     with pytest.raises(TypeError):
-        memoryview(buf)[0] = 0 if PY3 else b'\0'
+        memoryview(buf)[0] = b"\0" if env.PY2 else 0
     with pytest.raises(TypeError):
-        io.BytesIO(b'1').readinto(buf)
+        io.BytesIO(b"1").readinto(buf)
+
+
+def test_ctypes_array_1d():
+    char1d = (ctypes.c_char * 10)()
+    int1d = (ctypes.c_int * 15)()
+    long1d = (ctypes.c_long * 7)()
+
+    for carray in (char1d, int1d, long1d):
+        info = m.get_buffer_info(carray)
+        assert info.itemsize == ctypes.sizeof(carray._type_)
+        assert info.size == len(carray)
+        assert info.ndim == 1
+        assert info.shape == [info.size]
+        assert info.strides == [info.itemsize]
+        assert not info.readonly
+
+
+def test_ctypes_array_2d():
+    char2d = ((ctypes.c_char * 10) * 4)()
+    int2d = ((ctypes.c_int * 15) * 3)()
+    long2d = ((ctypes.c_long * 7) * 2)()
+
+    for carray in (char2d, int2d, long2d):
+        info = m.get_buffer_info(carray)
+        assert info.itemsize == ctypes.sizeof(carray[0]._type_)
+        assert info.size == len(carray) * len(carray[0])
+        assert info.ndim == 2
+        assert info.shape == [len(carray), len(carray[0])]
+        assert info.strides == [info.itemsize * len(carray[0]), info.itemsize]
+        assert not info.readonly
+
+
+@pytest.mark.skipif(
+    "env.PYPY and env.PY2", reason="PyPy2 bytes buffer not reported as readonly"
+)
+def test_ctypes_from_buffer():
+    test_pystr = b"0123456789"
+    for pyarray in (test_pystr, bytearray(test_pystr)):
+        pyinfo = m.get_buffer_info(pyarray)
+
+        if pyinfo.readonly:
+            cbytes = (ctypes.c_char * len(pyarray)).from_buffer_copy(pyarray)
+            cinfo = m.get_buffer_info(cbytes)
+        else:
+            cbytes = (ctypes.c_char * len(pyarray)).from_buffer(pyarray)
+            cinfo = m.get_buffer_info(cbytes)
+
+        assert cinfo.size == pyinfo.size
+        assert cinfo.ndim == pyinfo.ndim
+        assert cinfo.shape == pyinfo.shape
+        assert cinfo.strides == pyinfo.strides
+        assert not cinfo.readonly
diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp
index acb2446..acc9f8f 100644
--- a/tests/test_builtin_casters.cpp
+++ b/tests/test_builtin_casters.cpp
@@ -117,12 +117,16 @@
         return std::make_pair(RValueCaster{}, std::make_tuple(RValueCaster{}, std::make_pair(RValueCaster{}, RValueCaster{}))); });
     m.def("lvalue_nested", []() -> const decltype(lvnested) & { return lvnested; });
 
+    static std::pair<int, std::string> int_string_pair{2, "items"};
+    m.def("int_string_pair", []() { return &int_string_pair; });
+
     // test_builtins_cast_return_none
     m.def("return_none_string", []() -> std::string * { return nullptr; });
     m.def("return_none_char",   []() -> const char *  { return nullptr; });
     m.def("return_none_bool",   []() -> bool *        { return nullptr; });
     m.def("return_none_int",    []() -> int *         { return nullptr; });
     m.def("return_none_float",  []() -> float *       { return nullptr; });
+    m.def("return_none_pair",   []() -> std::pair<int,int> * { return nullptr; });
 
     // test_none_deferred
     m.def("defer_none_cstring", [](char *) { return false; });
diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py
index 9142258..bd7996b 100644
--- a/tests/test_builtin_casters.py
+++ b/tests/test_builtin_casters.py
@@ -1,6 +1,8 @@
-# Python < 3 needs this: coding=utf-8
+# -*- coding: utf-8 -*-
 import pytest
 
+import env  # noqa: F401
+
 from pybind11_tests import builtin_casters as m
 from pybind11_tests import UserType, IncType
 
@@ -35,79 +37,85 @@
         with pytest.raises(UnicodeDecodeError):
             m.bad_utf8_u8string()
 
-    assert m.u8_Z() == 'Z'
-    assert m.u8_eacute() == u'é'
-    assert m.u16_ibang() == u'‽'
-    assert m.u32_mathbfA() == u'𝐀'
-    assert m.wchar_heart() == u'♥'
+    assert m.u8_Z() == "Z"
+    assert m.u8_eacute() == u"é"
+    assert m.u16_ibang() == u"‽"
+    assert m.u32_mathbfA() == u"𝐀"
+    assert m.wchar_heart() == u"♥"
     if hasattr(m, "has_u8string"):
-        assert m.u8_char8_Z() == 'Z'
+        assert m.u8_char8_Z() == "Z"
 
 
 def test_single_char_arguments():
     """Tests failures for passing invalid inputs to char-accepting functions"""
+
     def toobig_message(r):
         return "Character code point not in range({0:#x})".format(r)
+
     toolong_message = "Expected a character, but multi-character string found"
 
-    assert m.ord_char(u'a') == 0x61  # simple ASCII
-    assert m.ord_char_lv(u'b') == 0x62
-    assert m.ord_char(u'é') == 0xE9  # requires 2 bytes in utf-8, but can be stuffed in a char
+    assert m.ord_char(u"a") == 0x61  # simple ASCII
+    assert m.ord_char_lv(u"b") == 0x62
+    assert (
+        m.ord_char(u"é") == 0xE9
+    )  # requires 2 bytes in utf-8, but can be stuffed in a char
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char(u'Ā') == 0x100  # requires 2 bytes, doesn't fit in a char
+        assert m.ord_char(u"Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
     assert str(excinfo.value) == toobig_message(0x100)
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char(u'ab')
+        assert m.ord_char(u"ab")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_char16(u'a') == 0x61
-    assert m.ord_char16(u'é') == 0xE9
-    assert m.ord_char16_lv(u'ê') == 0xEA
-    assert m.ord_char16(u'Ā') == 0x100
-    assert m.ord_char16(u'‽') == 0x203d
-    assert m.ord_char16(u'♥') == 0x2665
-    assert m.ord_char16_lv(u'♡') == 0x2661
+    assert m.ord_char16(u"a") == 0x61
+    assert m.ord_char16(u"é") == 0xE9
+    assert m.ord_char16_lv(u"ê") == 0xEA
+    assert m.ord_char16(u"Ā") == 0x100
+    assert m.ord_char16(u"‽") == 0x203D
+    assert m.ord_char16(u"♥") == 0x2665
+    assert m.ord_char16_lv(u"♡") == 0x2661
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char16(u'🎂') == 0x1F382  # requires surrogate pair
+        assert m.ord_char16(u"🎂") == 0x1F382  # requires surrogate pair
     assert str(excinfo.value) == toobig_message(0x10000)
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char16(u'aa')
+        assert m.ord_char16(u"aa")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_char32(u'a') == 0x61
-    assert m.ord_char32(u'é') == 0xE9
-    assert m.ord_char32(u'Ā') == 0x100
-    assert m.ord_char32(u'‽') == 0x203d
-    assert m.ord_char32(u'♥') == 0x2665
-    assert m.ord_char32(u'🎂') == 0x1F382
+    assert m.ord_char32(u"a") == 0x61
+    assert m.ord_char32(u"é") == 0xE9
+    assert m.ord_char32(u"Ā") == 0x100
+    assert m.ord_char32(u"‽") == 0x203D
+    assert m.ord_char32(u"♥") == 0x2665
+    assert m.ord_char32(u"🎂") == 0x1F382
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char32(u'aa')
+        assert m.ord_char32(u"aa")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_wchar(u'a') == 0x61
-    assert m.ord_wchar(u'é') == 0xE9
-    assert m.ord_wchar(u'Ā') == 0x100
-    assert m.ord_wchar(u'‽') == 0x203d
-    assert m.ord_wchar(u'♥') == 0x2665
+    assert m.ord_wchar(u"a") == 0x61
+    assert m.ord_wchar(u"é") == 0xE9
+    assert m.ord_wchar(u"Ā") == 0x100
+    assert m.ord_wchar(u"‽") == 0x203D
+    assert m.ord_wchar(u"♥") == 0x2665
     if m.wchar_size == 2:
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_wchar(u'🎂') == 0x1F382  # requires surrogate pair
+            assert m.ord_wchar(u"🎂") == 0x1F382  # requires surrogate pair
         assert str(excinfo.value) == toobig_message(0x10000)
     else:
-        assert m.ord_wchar(u'🎂') == 0x1F382
+        assert m.ord_wchar(u"🎂") == 0x1F382
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_wchar(u'aa')
+        assert m.ord_wchar(u"aa")
     assert str(excinfo.value) == toolong_message
 
     if hasattr(m, "has_u8string"):
-        assert m.ord_char8(u'a') == 0x61  # simple ASCII
-        assert m.ord_char8_lv(u'b') == 0x62
-        assert m.ord_char8(u'é') == 0xE9  # requires 2 bytes in utf-8, but can be stuffed in a char
+        assert m.ord_char8(u"a") == 0x61  # simple ASCII
+        assert m.ord_char8_lv(u"b") == 0x62
+        assert (
+            m.ord_char8(u"é") == 0xE9
+        )  # requires 2 bytes in utf-8, but can be stuffed in a char
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_char8(u'Ā') == 0x100  # requires 2 bytes, doesn't fit in a char
+            assert m.ord_char8(u"Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
         assert str(excinfo.value) == toobig_message(0x100)
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_char8(u'ab')
+            assert m.ord_char8(u"ab")
         assert str(excinfo.value) == toolong_message
 
 
@@ -115,88 +123,108 @@
     """Tests the ability to pass bytes to C++ string-accepting functions.  Note that this is
     one-way: the only way to return bytes to Python is via the pybind11::bytes class."""
     # Issue #816
-    import sys
-    byte = bytes if sys.version_info[0] < 3 else str
 
-    assert m.strlen(byte("hi")) == 2
-    assert m.string_length(byte("world")) == 5
-    assert m.string_length(byte("a\x00b")) == 3
-    assert m.strlen(byte("a\x00b")) == 1  # C-string limitation
+    def to_bytes(s):
+        b = s if env.PY2 else s.encode("utf8")
+        assert isinstance(b, bytes)
+        return b
+
+    assert m.strlen(to_bytes("hi")) == 2
+    assert m.string_length(to_bytes("world")) == 5
+    assert m.string_length(to_bytes("a\x00b")) == 3
+    assert m.strlen(to_bytes("a\x00b")) == 1  # C-string limitation
 
     # passing in a utf8 encoded string should work
-    assert m.string_length(u'💩'.encode("utf8")) == 4
+    assert m.string_length(u"💩".encode("utf8")) == 4
 
 
 @pytest.mark.skipif(not hasattr(m, "has_string_view"), reason="no <string_view>")
 def test_string_view(capture):
     """Tests support for C++17 string_view arguments and return values"""
     assert m.string_view_chars("Hi") == [72, 105]
-    assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
-    assert m.string_view16_chars("Hi 🎂") == [72, 105, 32, 0xd83c, 0xdf82]
-    assert m.string_view32_chars("Hi 🎂") == [72, 105, 32, 127874]
+    assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xF0, 0x9F, 0x8E, 0x82]
+    assert m.string_view16_chars(u"Hi 🎂") == [72, 105, 32, 0xD83C, 0xDF82]
+    assert m.string_view32_chars(u"Hi 🎂") == [72, 105, 32, 127874]
     if hasattr(m, "has_u8string"):
         assert m.string_view8_chars("Hi") == [72, 105]
-        assert m.string_view8_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
+        assert m.string_view8_chars(u"Hi 🎂") == [72, 105, 32, 0xF0, 0x9F, 0x8E, 0x82]
 
-    assert m.string_view_return() == "utf8 secret 🎂"
-    assert m.string_view16_return() == "utf16 secret 🎂"
-    assert m.string_view32_return() == "utf32 secret 🎂"
+    assert m.string_view_return() == u"utf8 secret 🎂"
+    assert m.string_view16_return() == u"utf16 secret 🎂"
+    assert m.string_view32_return() == u"utf32 secret 🎂"
     if hasattr(m, "has_u8string"):
-        assert m.string_view8_return() == "utf8 secret 🎂"
+        assert m.string_view8_return() == u"utf8 secret 🎂"
 
     with capture:
         m.string_view_print("Hi")
         m.string_view_print("utf8 🎂")
-        m.string_view16_print("utf16 🎂")
-        m.string_view32_print("utf32 🎂")
-    assert capture == """
+        m.string_view16_print(u"utf16 🎂")
+        m.string_view32_print(u"utf32 🎂")
+    assert (
+        capture
+        == u"""
         Hi 2
         utf8 🎂 9
         utf16 🎂 8
         utf32 🎂 7
     """
+    )
     if hasattr(m, "has_u8string"):
         with capture:
             m.string_view8_print("Hi")
-            m.string_view8_print("utf8 🎂")
-        assert capture == """
+            m.string_view8_print(u"utf8 🎂")
+        assert (
+            capture
+            == u"""
             Hi 2
             utf8 🎂 9
         """
+        )
 
     with capture:
         m.string_view_print("Hi, ascii")
         m.string_view_print("Hi, utf8 🎂")
-        m.string_view16_print("Hi, utf16 🎂")
-        m.string_view32_print("Hi, utf32 🎂")
-    assert capture == """
+        m.string_view16_print(u"Hi, utf16 🎂")
+        m.string_view32_print(u"Hi, utf32 🎂")
+    assert (
+        capture
+        == u"""
         Hi, ascii 9
         Hi, utf8 🎂 13
         Hi, utf16 🎂 12
         Hi, utf32 🎂 11
     """
+    )
     if hasattr(m, "has_u8string"):
         with capture:
             m.string_view8_print("Hi, ascii")
-            m.string_view8_print("Hi, utf8 🎂")
-        assert capture == """
+            m.string_view8_print(u"Hi, utf8 🎂")
+        assert (
+            capture
+            == u"""
             Hi, ascii 9
             Hi, utf8 🎂 13
         """
+        )
 
 
 def test_integer_casting():
     """Issue #929 - out-of-range integer values shouldn't be accepted"""
-    import sys
     assert m.i32_str(-1) == "-1"
     assert m.i64_str(-1) == "-1"
     assert m.i32_str(2000000000) == "2000000000"
     assert m.u32_str(2000000000) == "2000000000"
-    if sys.version_info < (3,):
+    if env.PY2:
         assert m.i32_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
         assert m.i64_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
-        assert m.i64_str(long(-999999999999)) == "-999999999999"  # noqa: F821 undefined name
-        assert m.u64_str(long(999999999999)) == "999999999999"  # noqa: F821 undefined name 'long'
+        assert (
+            m.i64_str(long(-999999999999))  # noqa: F821 undefined name 'long'
+            == "-999999999999"
+        )
+        assert (
+            m.u64_str(long(999999999999))  # noqa: F821 undefined name 'long'
+            == "999999999999"
+        )
     else:
         assert m.i64_str(-999999999999) == "-999999999999"
         assert m.u64_str(999999999999) == "999999999999"
@@ -214,7 +242,7 @@
         m.i32_str(3000000000)
     assert "incompatible function arguments" in str(excinfo.value)
 
-    if sys.version_info < (3,):
+    if env.PY2:
         with pytest.raises(TypeError) as excinfo:
             m.u32_str(long(-1))  # noqa: F821 undefined name 'long'
         assert "incompatible function arguments" in str(excinfo.value)
@@ -232,16 +260,22 @@
     assert m.tuple_passthrough([True, "test", 5]) == (5, "test", True)
     assert m.empty_tuple() == ()
 
-    assert doc(m.pair_passthrough) == """
+    assert (
+        doc(m.pair_passthrough)
+        == """
         pair_passthrough(arg0: Tuple[bool, str]) -> Tuple[str, bool]
 
         Return a pair in reversed order
     """
-    assert doc(m.tuple_passthrough) == """
+    )
+    assert (
+        doc(m.tuple_passthrough)
+        == """
         tuple_passthrough(arg0: Tuple[bool, str, int]) -> Tuple[int, str, bool]
 
         Return a triple in reversed order
     """
+    )
 
     assert m.rvalue_pair() == ("rvalue", "rvalue")
     assert m.lvalue_pair() == ("lvalue", "lvalue")
@@ -250,6 +284,8 @@
     assert m.rvalue_nested() == ("rvalue", ("rvalue", ("rvalue", "rvalue")))
     assert m.lvalue_nested() == ("lvalue", ("lvalue", ("lvalue", "lvalue")))
 
+    assert m.int_string_pair() == (2, "items")
+
 
 def test_builtins_cast_return_none():
     """Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""
@@ -258,6 +294,7 @@
     assert m.return_none_bool() is None
     assert m.return_none_int() is None
     assert m.return_none_float() is None
+    assert m.return_none_pair() is None
 
 
 def test_none_deferred():
@@ -352,9 +389,9 @@
     assert convert(A(False)) is False
 
 
-@pytest.requires_numpy
 def test_numpy_bool():
-    import numpy as np
+    np = pytest.importorskip("numpy")
+
     convert, noconvert = m.bool_passthrough, m.bool_passthrough_noconvert
 
     def cant_convert(v):
@@ -365,7 +402,7 @@
     assert convert(np.bool_(False)) is False
     assert noconvert(np.bool_(True)) is True
     assert noconvert(np.bool_(False)) is False
-    cant_convert(np.zeros(2, dtype='int'))
+    cant_convert(np.zeros(2, dtype="int"))
 
 
 def test_int_long():
@@ -375,7 +412,8 @@
     long."""
 
     import sys
-    must_be_long = type(getattr(sys, 'maxint', 1) + 1)
+
+    must_be_long = type(getattr(sys, "maxint", 1) + 1)
     assert isinstance(m.int_cast(), int)
     assert isinstance(m.long_cast(), int)
     assert isinstance(m.longlong_cast(), must_be_long)
diff --git a/tests/test_call_policies.cpp b/tests/test_call_policies.cpp
index fd24557..26c83f8 100644
--- a/tests/test_call_policies.cpp
+++ b/tests/test_call_policies.cpp
@@ -46,6 +46,7 @@
     class Parent {
     public:
         Parent() { py::print("Allocating parent."); }
+        Parent(const Parent& parent) = default;
         ~Parent() { py::print("Releasing parent."); }
         void addChild(Child *) { }
         Child *returnChild() { return new Child(); }
diff --git a/tests/test_call_policies.py b/tests/test_call_policies.py
index 7c83559..e0413d1 100644
--- a/tests/test_call_policies.py
+++ b/tests/test_call_policies.py
@@ -1,8 +1,13 @@
+# -*- coding: utf-8 -*-
 import pytest
+
+import env  # noqa: F401
+
 from pybind11_tests import call_policies as m
 from pybind11_tests import ConstructorStats
 
 
+@pytest.mark.xfail("env.PYPY", reason="sometimes comes out 1 off on PyPy", strict=False)
 def test_keep_alive_argument(capture):
     n_inst = ConstructorStats.detail_reg_inst()
     with capture:
@@ -11,10 +16,13 @@
     with capture:
         p.addChild(m.Child())
         assert ConstructorStats.detail_reg_inst() == n_inst + 1
-    assert capture == """
+    assert (
+        capture
+        == """
         Allocating child.
         Releasing child.
     """
+    )
     with capture:
         del p
         assert ConstructorStats.detail_reg_inst() == n_inst
@@ -30,10 +38,13 @@
     with capture:
         del p
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
     """
+    )
 
 
 def test_keep_alive_return_value(capture):
@@ -44,10 +55,13 @@
     with capture:
         p.returnChild()
         assert ConstructorStats.detail_reg_inst() == n_inst + 1
-    assert capture == """
+    assert (
+        capture
+        == """
         Allocating child.
         Releasing child.
     """
+    )
     with capture:
         del p
         assert ConstructorStats.detail_reg_inst() == n_inst
@@ -63,28 +77,34 @@
     with capture:
         del p
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
     """
+    )
 
 
-# https://bitbucket.org/pypy/pypy/issues/2447
-@pytest.unsupported_on_pypy
+# https://foss.heptapod.net/pypy/pypy/-/issues/2447
+@pytest.mark.xfail("env.PYPY", reason="_PyObject_GetDictPtr is unimplemented")
 def test_alive_gc(capture):
     n_inst = ConstructorStats.detail_reg_inst()
     p = m.ParentGC()
     p.addChildKeepAlive(m.Child())
     assert ConstructorStats.detail_reg_inst() == n_inst + 2
     lst = [p]
-    lst.append(lst)   # creates a circular reference
+    lst.append(lst)  # creates a circular reference
     with capture:
         del p, lst
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
     """
+    )
 
 
 def test_alive_gc_derived(capture):
@@ -96,14 +116,17 @@
     p.addChildKeepAlive(m.Child())
     assert ConstructorStats.detail_reg_inst() == n_inst + 2
     lst = [p]
-    lst.append(lst)   # creates a circular reference
+    lst.append(lst)  # creates a circular reference
     with capture:
         del p, lst
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
     """
+    )
 
 
 def test_alive_gc_multi_derived(capture):
@@ -118,15 +141,18 @@
     # +3 rather than +2 because Derived corresponds to two registered instances
     assert ConstructorStats.detail_reg_inst() == n_inst + 3
     lst = [p]
-    lst.append(lst)   # creates a circular reference
+    lst.append(lst)  # creates a circular reference
     with capture:
         del p, lst
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
         Releasing child.
     """
+    )
 
 
 def test_return_none(capture):
@@ -162,17 +188,23 @@
     with capture:
         p = m.Parent(m.Child())
         assert ConstructorStats.detail_reg_inst() == n_inst + 2
-    assert capture == """
+    assert (
+        capture
+        == """
         Allocating child.
         Allocating parent.
     """
+    )
     with capture:
         del p
         assert ConstructorStats.detail_reg_inst() == n_inst
-    assert capture == """
+    assert (
+        capture
+        == """
         Releasing parent.
         Releasing child.
     """
+    )
 
 
 def test_call_guard():
diff --git a/tests/test_callbacks.cpp b/tests/test_callbacks.cpp
index 71b88c4..683dfb3 100644
--- a/tests/test_callbacks.cpp
+++ b/tests/test_callbacks.cpp
@@ -117,7 +117,11 @@
         }
     });
 
-    class AbstractBase { public: virtual unsigned int func() = 0; };
+    class AbstractBase {
+    public:
+        virtual ~AbstractBase() = default;
+        virtual unsigned int func() = 0;
+    };
     m.def("func_accepting_func_accepting_base", [](std::function<double(AbstractBase&)>) { });
 
     struct MovableObject {
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index 6439c8e..039b877 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import pytest
 from pybind11_tests import callbacks as m
 from threading import Thread
@@ -41,17 +42,19 @@
 
 
 def test_keyword_args_and_generalized_unpacking():
-
     def f(*args, **kwargs):
         return args, kwargs
 
     assert m.test_tuple_unpacking(f) == (("positional", 1, 2, 3, 4, 5, 6), {})
-    assert m.test_dict_unpacking(f) == (("positional", 1), {"key": "value", "a": 1, "b": 2})
+    assert m.test_dict_unpacking(f) == (
+        ("positional", 1),
+        {"key": "value", "a": 1, "b": 2},
+    )
     assert m.test_keyword_args(f) == ((), {"x": 10, "y": 20})
     assert m.test_unpacking_and_keywords1(f) == ((1, 2), {"c": 3, "d": 4})
     assert m.test_unpacking_and_keywords2(f) == (
         ("positional", 1, 2, 3, 4, 5),
-        {"key": "value", "a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
+        {"key": "value", "a": 1, "b": 2, "c": 3, "d": 4, "e": 5},
     )
 
     with pytest.raises(TypeError) as excinfo:
@@ -82,12 +85,18 @@