gh-98586: Add vector call APIs to the Limited API (GH-98587)

Expose the facilities for making vector calls through Python's limited API.
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index fde62ea..1336584 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -1,4 +1,5 @@
 role,name,added,ifdef_note,struct_abi_kind
+macro,PY_VECTORCALL_ARGUMENTS_OFFSET,3.12,,
 function,PyAIter_Check,3.10,,
 function,PyArg_Parse,3.2,,
 function,PyArg_ParseTuple,3.2,,
@@ -536,6 +537,8 @@
 function,PyObject_Size,3.2,,
 function,PyObject_Str,3.2,,
 function,PyObject_Type,3.2,,
+function,PyObject_Vectorcall,3.12,,
+function,PyObject_VectorcallMethod,3.12,,
 var,PyProperty_Type,3.2,,
 var,PyRangeIter_Type,3.2,,
 var,PyRange_Type,3.2,,
diff --git a/Include/abstract.h b/Include/abstract.h
index 784ff7e..064b030 100644
--- a/Include/abstract.h
+++ b/Include/abstract.h
@@ -238,6 +238,22 @@ PyAPI_FUNC(Py_ssize_t) PyVectorcall_NARGS(size_t nargsf);
    "tuple" and keyword arguments "dict". "dict" may also be NULL */
 PyAPI_FUNC(PyObject *) PyVectorcall_Call(PyObject *callable, PyObject *tuple, PyObject *dict);
 
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030C0000
+#define PY_VECTORCALL_ARGUMENTS_OFFSET \
+    (_Py_STATIC_CAST(size_t, 1) << (8 * sizeof(size_t) - 1))
+
+/* Perform a PEP 590-style vector call on 'callable' */
+PyAPI_FUNC(PyObject *) PyObject_Vectorcall(
+    PyObject *callable,
+    PyObject *const *args,
+    size_t nargsf,
+    PyObject *kwnames);
+
+/* Call the method 'name' on args[0] with arguments in args[1..nargsf-1]. */
+PyAPI_FUNC(PyObject *) PyObject_VectorcallMethod(
+    PyObject *name, PyObject *const *args,
+    size_t nargsf, PyObject *kwnames);
+#endif
 
 /* Implemented elsewhere:
 
diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h
index 6da29cd..3b27aab 100644
--- a/Include/cpython/abstract.h
+++ b/Include/cpython/abstract.h
@@ -50,9 +50,6 @@ PyAPI_FUNC(PyObject *) _PyObject_MakeTpCall(
     PyObject *const *args, Py_ssize_t nargs,
     PyObject *keywords);
 
-#define PY_VECTORCALL_ARGUMENTS_OFFSET \
-    (_Py_STATIC_CAST(size_t, 1) << (8 * sizeof(size_t) - 1))
-
 // PyVectorcall_NARGS() is exported as a function for the stable ABI.
 // Here (when we are not using the stable ABI), the name is overridden to
 // call a static inline function for best performance.
@@ -65,12 +62,6 @@ _PyVectorcall_NARGS(size_t n)
 
 PyAPI_FUNC(vectorcallfunc) PyVectorcall_Function(PyObject *callable);
 
-PyAPI_FUNC(PyObject *) PyObject_Vectorcall(
-    PyObject *callable,
-    PyObject *const *args,
-    size_t nargsf,
-    PyObject *kwnames);
-
 // Backwards compatibility aliases for API that was provisional in Python 3.8
 #define _PyObject_Vectorcall PyObject_Vectorcall
 #define _PyObject_VectorcallMethod PyObject_VectorcallMethod
@@ -96,10 +87,6 @@ PyAPI_FUNC(PyObject *) _PyObject_FastCall(
 
 PyAPI_FUNC(PyObject *) PyObject_CallOneArg(PyObject *func, PyObject *arg);
 
-PyAPI_FUNC(PyObject *) PyObject_VectorcallMethod(
-    PyObject *name, PyObject *const *args,
-    size_t nargsf, PyObject *kwnames);
-
 static inline PyObject *
 PyObject_CallMethodNoArgs(PyObject *self, PyObject *name)
 {
diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py
index 0b37116..d4ddb79 100644
--- a/Lib/test/test_call.py
+++ b/Lib/test/test_call.py
@@ -812,11 +812,43 @@ def get_a(x):
             assert_equal("overridden", get_a(x))
 
     @requires_limited_api
-    def test_vectorcall_limited(self):
+    def test_vectorcall_limited_incoming(self):
         from _testcapi import pyobject_vectorcall
         obj = _testcapi.LimitedVectorCallClass()
         self.assertEqual(pyobject_vectorcall(obj, (), ()), "vectorcall called")
 
+    @requires_limited_api
+    def test_vectorcall_limited_outgoing(self):
+        from _testcapi import call_vectorcall
+
+        args_captured = []
+        kwargs_captured = []
+
+        def f(*args, **kwargs):
+            args_captured.append(args)
+            kwargs_captured.append(kwargs)
+            return "success"
+
+        self.assertEqual(call_vectorcall(f), "success")
+        self.assertEqual(args_captured, [("foo",)])
+        self.assertEqual(kwargs_captured, [{"baz": "bar"}])
+
+    @requires_limited_api
+    def test_vectorcall_limited_outgoing_method(self):
+        from _testcapi import call_vectorcall_method
+
+        args_captured = []
+        kwargs_captured = []
+
+        class TestInstance:
+            def f(self, *args, **kwargs):
+                args_captured.append(args)
+                kwargs_captured.append(kwargs)
+                return "success"
+
+        self.assertEqual(call_vectorcall_method(TestInstance()), "success")
+        self.assertEqual(args_captured, [("foo",)])
+        self.assertEqual(kwargs_captured, [{"baz": "bar"}])
 
 class A:
     def method_two_args(self, x, y):
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index a803e3a..67c6534 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -547,6 +547,8 @@
     "PyObject_Size",
     "PyObject_Str",
     "PyObject_Type",
+    "PyObject_Vectorcall",
+    "PyObject_VectorcallMethod",
     "PyProperty_Type",
     "PyRangeIter_Type",
     "PyRange_Type",
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-24-10-30-30.gh-issue-98586.Tha5Iy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-24-10-30-30.gh-issue-98586.Tha5Iy.rst
new file mode 100644
index 0000000..5d7b0c8
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-24-10-30-30.gh-issue-98586.Tha5Iy.rst
@@ -0,0 +1,7 @@
+Added the methods :c:func:`PyObject_Vectorcall` and
+:c:func:`PyObject_VectorcallMethod` to the :ref:`Limited API <stable>` along
+with the auxiliary macro constant :c:macro:`PY_VECTORCALL_ARGUMENTS_OFFSET`.
+
+The availability of these functions enables more efficient :PEP:`590` vector
+calls from binary extension modules that avoid argument boxing/unboxing
+overheads.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index e78646f..e18a6e8 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2293,3 +2293,9 @@
     added = '3.12'
 [typedef.vectorcallfunc]
     added = '3.12'
+[function.PyObject_Vectorcall]
+    added = '3.12'
+[function.PyObject_VectorcallMethod]
+    added = '3.12'
+[macro.PY_VECTORCALL_ARGUMENTS_OFFSET]
+    added = '3.12'
diff --git a/Modules/_testcapi/vectorcall_limited.c b/Modules/_testcapi/vectorcall_limited.c
index ee57af8..a69f1d3 100644
--- a/Modules/_testcapi/vectorcall_limited.c
+++ b/Modules/_testcapi/vectorcall_limited.c
@@ -32,6 +32,105 @@ LimitedVectorCallClass_new(PyTypeObject *tp, PyTypeObject *a, PyTypeObject *kw)
     return self;
 }
 
+static PyObject *
+call_vectorcall(PyObject* self, PyObject *callable)
+{
+    PyObject *args[3] = { NULL, NULL, NULL };
+    PyObject *kwname = NULL, *kwnames = NULL, *result = NULL;
+
+    args[1] = PyUnicode_FromString("foo");
+    if (!args[1]) {
+        goto leave;
+    }
+
+    args[2] = PyUnicode_FromString("bar");
+    if (!args[2]) {
+        goto leave;
+    }
+
+    kwname = PyUnicode_InternFromString("baz");
+    if (!kwname) {
+        goto leave;
+    }
+
+    kwnames = PyTuple_New(1);
+    if (!kwnames) {
+        goto leave;
+    }
+
+    if (PyTuple_SetItem(kwnames, 0, kwname)) {
+        goto leave;
+    }
+
+    result = PyObject_Vectorcall(
+        callable,
+        args + 1,
+        1 | PY_VECTORCALL_ARGUMENTS_OFFSET,
+        kwnames
+    );
+
+leave:
+    Py_XDECREF(args[1]);
+    Py_XDECREF(args[2]);
+    Py_XDECREF(kwnames);
+
+    return result;
+}
+
+static PyObject *
+call_vectorcall_method(PyObject* self, PyObject *callable)
+{
+    PyObject *args[3] = { NULL, NULL, NULL };
+    PyObject *name = NULL, *kwname = NULL,
+             *kwnames = NULL, *result = NULL;
+
+    name = PyUnicode_FromString("f");
+    if (!name) {
+        goto leave;
+    }
+
+    args[0] = callable;
+    args[1] = PyUnicode_FromString("foo");
+    if (!args[1]) {
+        goto leave;
+    }
+
+    args[2] = PyUnicode_FromString("bar");
+    if (!args[2]) {
+        goto leave;
+    }
+
+    kwname = PyUnicode_InternFromString("baz");
+    if (!kwname) {
+        goto leave;
+    }
+
+    kwnames = PyTuple_New(1);
+    if (!kwnames) {
+        goto leave;
+    }
+
+    if (PyTuple_SetItem(kwnames, 0, kwname)) {
+        goto leave;
+    }
+
+
+    result = PyObject_VectorcallMethod(
+        name,
+        args,
+        2 | PY_VECTORCALL_ARGUMENTS_OFFSET,
+        kwnames
+    );
+
+leave:
+    Py_XDECREF(name);
+    Py_XDECREF(args[1]);
+    Py_XDECREF(args[2]);
+    Py_XDECREF(kwnames);
+
+    return result;
+}
+
 static PyMemberDef LimitedVectorCallClass_members[] = {
     {"__vectorcalloffset__", T_PYSSIZET, sizeof(PyObject), READONLY},
     {NULL}
@@ -54,10 +153,8 @@ static PyType_Spec LimitedVectorCallClass_spec = {
 };
 
 static PyMethodDef TestMethods[] = {
-    /* Add module methods here.
-     * (Empty list left here as template/example, since using
-     * PyModule_AddFunctions isn't very common.)
-     */
+    {"call_vectorcall", call_vectorcall, METH_O},
+    {"call_vectorcall_method", call_vectorcall_method, METH_O},
     {NULL},
 };
 
diff --git a/PC/python3dll.c b/PC/python3dll.c
index c1b88c6..931f316 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -485,6 +485,8 @@
 EXPORT_FUNC(PyObject_Size)
 EXPORT_FUNC(PyObject_Str)
 EXPORT_FUNC(PyObject_Type)
+EXPORT_FUNC(PyObject_Vectorcall)
+EXPORT_FUNC(PyObject_VectorcallMethod)
 EXPORT_FUNC(PyOS_CheckStack)
 EXPORT_FUNC(PyOS_double_to_string)
 EXPORT_FUNC(PyOS_FSPath)