diff --git a/MANIFEST.in b/MANIFEST.in
index e9fdd6a..b8ca2e0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
 recursive-include cffi *.py *.h
-recursive-include c *.c *.h *.asm *.py win64.obj
+recursive-include c *.c *.h *.asm *.py win64.obj ffi.lib
 recursive-include testing *.py *.c *.h
 recursive-include doc *.py *.rst Makefile *.bat
 recursive-include demo py.cleanup *.py embedding_test.c manual.c
diff --git a/METADATA b/METADATA
index 512e1f3..db9d268 100644
--- a/METADATA
+++ b/METADATA
@@ -11,7 +11,7 @@
     type: HG
     value: "https://bitbucket.org/cffi/cffi/src"
   }
-  version: "1.12.2"
-  last_upgrade_date { year: 2019 month: 2 day: 26 }
+  version: "1.15.0"
+  last_upgrade_date { year: 2020 month: 11 day: 8 }
   license_type: NOTICE
 }
diff --git a/README.md b/README.md
index 3e7862d..a68639e 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 Download
 --------
 
-[Download page](https://bitbucket.org/cffi/cffi/downloads)
+[Download page](https://foss.heptapod.net/pypy/cffi/-/tags)
 
 Contact
 -------
diff --git a/c/.libs_cffi_backend/libffi-45372312.so.6.0.4 b/c/.libs_cffi_backend/libffi-45372312.so.6.0.4
deleted file mode 100644
index 59e65c0..0000000
--- a/c/.libs_cffi_backend/libffi-45372312.so.6.0.4
+++ /dev/null
Binary files differ
diff --git a/c/.libs_cffi_backend/libffi-9c61262e.so.8.1.0 b/c/.libs_cffi_backend/libffi-9c61262e.so.8.1.0
new file mode 100755
index 0000000..82b4232
--- /dev/null
+++ b/c/.libs_cffi_backend/libffi-9c61262e.so.8.1.0
Binary files differ
diff --git a/c/Android.bp b/c/Android.bp
index 7ddc006..a187416 100644
--- a/c/Android.bp
+++ b/c/Android.bp
@@ -55,6 +55,6 @@
 filegroup {
     name: "py-cffi-backend-libffi-files",
     srcs: [
-        ".libs_cffi_backend/libffi-45372312.so.6.0.4",
+        ".libs_cffi_backend/libffi-9c61262e.so.8.1.0",
     ],
 }
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
index c39866a..ffecbf9 100644
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -2,7 +2,7 @@
 #include <Python.h>
 #include "structmember.h"
 
-#define CFFI_VERSION  "1.12.2"
+#define CFFI_VERSION  "1.15.0"
 
 #ifdef MS_WIN32
 #include <windows.h>
@@ -80,17 +80,45 @@
  * That sounds like a horribly bad idea to me, and is the reason for why
  * I prefer CFFI crashing cleanly.
  *
- * Currently, we use libffi's ffi_closure_alloc() only on NetBSD.  It is
+ * Currently, we use libffi's ffi_closure_alloc() on NetBSD.  It is
  * known that on the NetBSD kernel, a different strategy is used which
  * should not be open to the fork() bug.
+ *
+ * This is also used on macOS, provided we are executing on macOS 10.15 or
+ * above.  It's a mess because it needs runtime checks in that case.
  */
 #ifdef __NetBSD__
-# define CFFI_TRUST_LIBFFI
+
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC 1
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE 1
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC 1
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC_MAYBE 1
+# define CFFI_CHECK_FFI_PREP_CIF_VAR 0
+# define CFFI_CHECK_FFI_PREP_CIF_VAR_MAYBE 0
+
+#elif defined(__APPLE__) && defined(FFI_AVAILABLE_APPLE)
+
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC __builtin_available(macos 10.15, ios 13, watchos 6, tvos 13, *)
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE 1
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC __builtin_available(macos 10.15, ios 13, watchos 6, tvos 13, *)
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC_MAYBE 1
+# define CFFI_CHECK_FFI_PREP_CIF_VAR __builtin_available(macos 10.15, ios 13, watchos 6, tvos 13, *)
+# define CFFI_CHECK_FFI_PREP_CIF_VAR_MAYBE 1
+
+#else
+
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC 0
+# define CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE 0
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC 0
+# define CFFI_CHECK_FFI_PREP_CLOSURE_LOC_MAYBE 0
+# define CFFI_CHECK_FFI_PREP_CIF_VAR 0
+# define CFFI_CHECK_FFI_PREP_CIF_VAR_MAYBE 0
+
 #endif
 
-#ifndef CFFI_TRUST_LIBFFI
-# include "malloc_closure.h"
-#endif
+/* always includes this, even if it turns out not to be used on NetBSD
+   because calls are behind "if (0)" */
+#include "malloc_closure.h"
 
 
 #if PY_MAJOR_VERSION >= 3
@@ -148,6 +176,14 @@
     (PyCObject_FromVoidPtr(pointer, destructor))
 #endif
 
+#if PY_VERSION_HEX < 0x030900a4
+# define Py_SET_REFCNT(obj, val) (Py_REFCNT(obj) = (val))
+#endif
+
+#if PY_VERSION_HEX >= 0x03080000
+# define USE_WRITEUNRAISABLEMSG
+#endif
+
 /************************************************************/
 
 /* base type flag: exactly one of the following: */
@@ -174,7 +210,7 @@
 #define CT_IS_BOOL             0x00080000
 #define CT_IS_FILE             0x00100000
 #define CT_IS_VOID_PTR         0x00200000
-#define CT_WITH_VAR_ARRAY      0x00400000
+#define CT_WITH_VAR_ARRAY      0x00400000 /* with open-ended array, anywhere */
 /* unused                      0x00800000 */
 #define CT_LAZY_FIELD_LIST     0x01000000
 #define CT_WITH_PACKED_CHANGE  0x02000000
@@ -238,12 +274,14 @@
 static PyTypeObject CData_Type;
 static PyTypeObject CDataOwning_Type;
 static PyTypeObject CDataOwningGC_Type;
+static PyTypeObject CDataFromBuf_Type;
 static PyTypeObject CDataGCP_Type;
 
 #define CTypeDescr_Check(ob)  (Py_TYPE(ob) == &CTypeDescr_Type)
 #define CData_Check(ob)       (Py_TYPE(ob) == &CData_Type ||            \
                                Py_TYPE(ob) == &CDataOwning_Type ||      \
                                Py_TYPE(ob) == &CDataOwningGC_Type ||    \
+                               Py_TYPE(ob) == &CDataFromBuf_Type ||     \
                                Py_TYPE(ob) == &CDataGCP_Type)
 #define CDataOwn_Check(ob)    (Py_TYPE(ob) == &CDataOwning_Type ||      \
                                Py_TYPE(ob) == &CDataOwningGC_Type)
@@ -277,14 +315,14 @@
 
 typedef struct {
     CDataObject head;
-    PyObject *structobj;
+    PyObject *structobj;   /* for ffi.new_handle() or ffi.new("struct *") */
 } CDataObject_own_structptr;
 
 typedef struct {
     CDataObject head;
     Py_ssize_t length;     /* same as CDataObject_own_length up to here */
     Py_buffer *bufferview;
-} CDataObject_owngc_frombuf;
+} CDataObject_frombuf;
 
 typedef struct {
     CDataObject head;
@@ -402,10 +440,10 @@
 
     if (ct->ct_unique_key != NULL) {
         /* revive dead object temporarily for DelItem */
-        Py_REFCNT(ct) = 43;
+        Py_SET_REFCNT(ct, 43);
         PyDict_DelItem(unique_cache, ct->ct_unique_key);
         assert(Py_REFCNT(ct) == 42);
-        Py_REFCNT(ct) = 0;
+        Py_SET_REFCNT(ct, 0);
         Py_DECREF(ct->ct_unique_key);
     }
     Py_XDECREF(ct->ct_itemdescr);
@@ -651,7 +689,7 @@
 
 static PyTypeObject CTypeDescr_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CTypeDescr",
+    "_cffi_backend.CType",
     offsetof(CTypeDescrObject, ct_name),
     sizeof(char),
     (destructor)ctypedescr_dealloc,             /* tp_dealloc */
@@ -1331,6 +1369,29 @@
 }
 
 static int
+add_varsize_length(Py_ssize_t offset, Py_ssize_t itemsize,
+                   Py_ssize_t varsizelength, Py_ssize_t *optvarsize)
+{
+    /* update '*optvarsize' to account for an array of 'varsizelength'
+       elements, each of size 'itemsize', that starts at 'offset'. */
+    Py_ssize_t size = ADD_WRAPAROUND(offset,
+                              MUL_WRAPAROUND(itemsize, varsizelength));
+    if (size < 0 ||
+        ((size - offset) / itemsize) != varsizelength) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "array size would overflow a Py_ssize_t");
+        return -1;
+    }
+    if (size > *optvarsize)
+        *optvarsize = size;
+    return 0;
+}
+
+static int
+convert_struct_from_object(char *data, CTypeDescrObject *ct, PyObject *init,
+                           Py_ssize_t *optvarsize);  /* forward */
+
+static int
 convert_vfield_from_object(char *data, CFieldObject *cf, PyObject *value,
                            Py_ssize_t *optvarsize)
 {
@@ -1343,20 +1404,11 @@
         if (optvarsize != NULL) {
             /* in this mode, the only purpose of this function is to compute
                the real size of the structure from a var-sized C99 array */
-            Py_ssize_t size, itemsize;
             assert(data == NULL);
-            itemsize = cf->cf_type->ct_itemdescr->ct_size;
-            size = ADD_WRAPAROUND(cf->cf_offset,
-                                  MUL_WRAPAROUND(itemsize, varsizelength));
-            if (size < 0 ||
-                ((size - cf->cf_offset) / itemsize) != varsizelength) {
-                PyErr_SetString(PyExc_OverflowError,
-                                "array size would overflow a Py_ssize_t");
-                return -1;
-            }
-            if (size > *optvarsize)
-                *optvarsize = size;
-            return 0;
+            return add_varsize_length(cf->cf_offset,
+                cf->cf_type->ct_itemdescr->ct_size,
+                varsizelength,
+                optvarsize);
         }
         /* if 'value' was only an integer, get_new_array_length() returns
            it and convert 'value' to be None.  Detect if this was the case,
@@ -1365,8 +1417,16 @@
         if (value == Py_None)
             return 0;
     }
-    if (optvarsize == NULL)
+    if (optvarsize == NULL) {
         return convert_field_from_object(data, cf, value);
+    }
+    else if ((cf->cf_type->ct_flags & CT_WITH_VAR_ARRAY) != 0 &&
+             !CData_Check(value)) {
+        Py_ssize_t subsize = cf->cf_type->ct_size;
+        if (convert_struct_from_object(NULL, cf->cf_type, value, &subsize) < 0)
+            return -1;
+        return add_varsize_length(cf->cf_offset, 1, subsize, optvarsize);
+    }
     else
         return 0;
 }
@@ -1831,6 +1891,7 @@
     assert(!(cd->c_type->ct_flags & (CT_IS_VOID_PTR | CT_FUNCTIONPTR)));
 
     if (cd->c_type->ct_flags & CT_IS_PTR_TO_OWNED) {
+        /* for ffi.new("struct *") */
         Py_DECREF(((CDataObject_own_structptr *)cd)->structobj);
     }
 #if defined(CFFI_MEM_DEBUG) || defined(CFFI_MEM_LEAK)
@@ -1851,9 +1912,6 @@
 
 static void cdataowninggc_dealloc(CDataObject *cd)
 {
-    assert(!(cd->c_type->ct_flags & (CT_IS_PTR_TO_OWNED |
-                                     CT_PRIMITIVE_ANY |
-                                     CT_STRUCT | CT_UNION)));
     PyObject_GC_UnTrack(cd);
 
     if (cd->c_type->ct_flags & CT_IS_VOID_PTR) {        /* a handle */
@@ -1864,20 +1922,28 @@
         ffi_closure *closure = ((CDataObject_closure *)cd)->closure;
         PyObject *args = (PyObject *)(closure->user_data);
         Py_XDECREF(args);
-#ifdef CFFI_TRUST_LIBFFI
-        ffi_closure_free(closure);
-#else
-        cffi_closure_free(closure);
+#if CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE
+        if (CFFI_CHECK_FFI_CLOSURE_ALLOC) {
+            ffi_closure_free(closure);
+        } else
 #endif
+            cffi_closure_free(closure);
     }
-    else if (cd->c_type->ct_flags & CT_ARRAY) {         /* from_buffer */
-        Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview;
-        PyBuffer_Release(view);
-        PyObject_Free(view);
+    else {
+        Py_FatalError("cdata CDataOwningGC_Type with unexpected type flags");
     }
     cdata_dealloc(cd);
 }
 
+static void cdatafrombuf_dealloc(CDataObject *cd)
+{
+    Py_buffer *view = ((CDataObject_frombuf *)cd)->bufferview;
+    cdata_dealloc(cd);
+
+    PyBuffer_Release(view);
+    PyObject_Free(view);
+}
+
 static int cdataowninggc_traverse(CDataObject *cd, visitproc visit, void *arg)
 {
     if (cd->c_type->ct_flags & CT_IS_VOID_PTR) {        /* a handle */
@@ -1889,10 +1955,13 @@
         PyObject *args = (PyObject *)(closure->user_data);
         Py_VISIT(args);
     }
-    else if (cd->c_type->ct_flags & CT_ARRAY) {         /* from_buffer */
-        Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview;
-        Py_VISIT(view->obj);
-    }
+    return 0;
+}
+
+static int cdatafrombuf_traverse(CDataObject *cd, visitproc visit, void *arg)
+{
+    Py_buffer *view = ((CDataObject_frombuf *)cd)->bufferview;
+    Py_VISIT(view->obj);
     return 0;
 }
 
@@ -1911,10 +1980,13 @@
         closure->user_data = NULL;
         Py_XDECREF(args);
     }
-    else if (cd->c_type->ct_flags & CT_ARRAY) {         /* from_buffer */
-        Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview;
-        PyBuffer_Release(view);
-    }
+    return 0;
+}
+
+static int cdatafrombuf_clear(CDataObject *cd)
+{
+    Py_buffer *view = ((CDataObject_frombuf *)cd)->bufferview;
+    PyBuffer_Release(view);
     return 0;
 }
 
@@ -2096,6 +2168,35 @@
     return -1;
 }
 
+static PyObject *_frombuf_repr(CDataObject *cd, const char *cd_type_name)
+{
+    Py_buffer *view = ((CDataObject_frombuf *)cd)->bufferview;
+    const char *obj_tp_name;
+    if (view->obj == NULL) {
+        return PyText_FromFormat(
+            "<cdata '%s' buffer RELEASED>",
+            cd_type_name);
+    }
+
+    obj_tp_name = Py_TYPE(view->obj)->tp_name;
+    if (cd->c_type->ct_flags & CT_ARRAY)
+    {
+        Py_ssize_t buflen = get_array_length(cd);
+        return PyText_FromFormat(
+            "<cdata '%s' buffer len %zd from '%.200s' object>",
+            cd_type_name,
+            buflen,
+            obj_tp_name);
+    }
+    else
+    {
+        return PyText_FromFormat(
+            "<cdata '%s' buffer from '%.200s' object>",
+            cd_type_name,
+            obj_tp_name);
+    }
+}
+
 static PyObject *cdataowning_repr(CDataObject *cd)
 {
     Py_ssize_t size = _cdata_var_byte_size(cd);
@@ -2125,16 +2226,12 @@
         else
             return _cdata_repr2(cd, "calling", PyTuple_GET_ITEM(args, 1));
     }
-    else if (cd->c_type->ct_flags & CT_ARRAY) {         /* from_buffer */
-        Py_buffer *view = ((CDataObject_owngc_frombuf *)cd)->bufferview;
-        Py_ssize_t buflen = get_array_length(cd);
-        return PyText_FromFormat(
-            "<cdata '%s' buffer len %zd from '%.200s' object>",
-            cd->c_type->ct_name,
-            buflen,
-            view->obj ? Py_TYPE(view->obj)->tp_name : "(null)");
-    }
-    return cdataowning_repr(cd);
+    return cdataowning_repr(cd);   /* but should be unreachable */
+}
+
+static PyObject *cdatafrombuf_repr(CDataObject *cd)
+{
+    return _frombuf_repr(cd, cd->c_type->ct_name);
 }
 
 static int cdata_nonzero(CDataObject *cd)
@@ -2171,7 +2268,10 @@
         return PyInt_FromLong(value);
     }
     if (cd->c_type->ct_flags & (CT_PRIMITIVE_SIGNED|CT_PRIMITIVE_UNSIGNED)) {
-        return convert_to_object(cd->c_data, cd->c_type);
+        PyObject *result = convert_to_object(cd->c_data, cd->c_type);
+        if (result != NULL && PyBool_Check(result))
+            result = PyInt_FromLong(PyInt_AsLong(result));
+        return result;
     }
     else if (cd->c_type->ct_flags & CT_PRIMITIVE_CHAR) {
         /*READ(cd->c_data, cd->c_type->ct_size)*/
@@ -2313,7 +2413,11 @@
     return pyres;
 }
 
-static long cdata_hash(CDataObject *v)
+#if PY_MAJOR_VERSION < 3
+typedef long Py_hash_t;
+#endif
+
+static Py_hash_t cdata_hash(PyObject *v)
 {
     if (((CDataObject *)v)->c_type->ct_flags & CT_PRIMITIVE_ANY) {
         PyObject *vv = convert_to_object(((CDataObject *)v)->c_data,
@@ -2321,13 +2425,13 @@
         if (vv == NULL)
             return -1;
         if (!CData_Check(vv)) {
-            long hash = PyObject_Hash(vv);
+            Py_hash_t hash = PyObject_Hash(vv);
             Py_DECREF(vv);
             return hash;
         }
         Py_DECREF(vv);
     }
-    return _Py_HashPointer(v->c_data);
+    return _Py_HashPointer(((CDataObject *)v)->c_data);
 }
 
 static Py_ssize_t
@@ -2821,7 +2925,8 @@
 convert_struct_to_owning_object(char *data, CTypeDescrObject *ct); /*forward*/
 
 static cif_description_t *
-fb_prepare_cif(PyObject *fargs, CTypeDescrObject *, ffi_abi);      /*forward*/
+fb_prepare_cif(PyObject *fargs, CTypeDescrObject *, Py_ssize_t, ffi_abi);
+                                                                   /*forward*/
 
 static PyObject *new_primitive_type(const char *name);             /*forward*/
 
@@ -2924,12 +3029,22 @@
     CTypeDescrObject *fresult;
     char *resultdata;
     char *errormsg;
+    struct freeme_s {
+        struct freeme_s *next;
+        union_alignment alignment;
+    } *freeme = NULL;
 
     if (!(cd->c_type->ct_flags & CT_FUNCTIONPTR)) {
         PyErr_Format(PyExc_TypeError, "cdata '%s' is not callable",
                      cd->c_type->ct_name);
         return NULL;
     }
+    if (cd->c_data == NULL) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "cannot call null pointer pointer from cdata '%s'",
+                     cd->c_type->ct_name);
+        return NULL;
+    }
     if (kwds != NULL && PyDict_Size(kwds) != 0) {
         PyErr_SetString(PyExc_TypeError,
                 "a cdata function cannot be called with keyword arguments");
@@ -3004,7 +3119,7 @@
 #else
         fabi = PyLong_AS_LONG(PyTuple_GET_ITEM(signature, 0));
 #endif
-        cif_descr = fb_prepare_cif(fvarargs, fresult, fabi);
+        cif_descr = fb_prepare_cif(fvarargs, fresult, nargs_declared, fabi);
         if (cif_descr == NULL)
             goto error;
     }
@@ -3038,7 +3153,21 @@
             else if (datasize < 0)
                 goto error;
             else {
-                tmpbuf = alloca(datasize);
+                if (datasize <= 512) {
+                    tmpbuf = alloca(datasize);
+                }
+                else {
+                    struct freeme_s *fp = (struct freeme_s *)PyObject_Malloc(
+                        offsetof(struct freeme_s, alignment) +
+                        (size_t)datasize);
+                    if (fp == NULL) {
+                        PyErr_NoMemory();
+                        goto error;
+                    }
+                    fp->next = freeme;
+                    freeme = fp;
+                    tmpbuf = (char *)&fp->alignment;
+                }
                 memset(tmpbuf, 0, datasize);
                 *(char **)data = tmpbuf;
                 if (convert_array_from_object(tmpbuf, argtype, obj) < 0)
@@ -3083,6 +3212,11 @@
     /* fall-through */
 
  error:
+    while (freeme != NULL) {
+        void *p = (void *)freeme;
+        freeme = freeme->next;
+        PyObject_Free(p);
+    }
     if (buffer)
         PyObject_Free(buffer);
     if (fvarargs != NULL) {
@@ -3139,9 +3273,8 @@
         if ((ct->ct_flags & (CT_POINTER | CT_ARRAY)) != 0)   /* ffi.new() */
             return 0;
     }
-    else if (Py_TYPE(cd) == &CDataOwningGC_Type) {
-        if (ct->ct_flags & CT_ARRAY)      /* ffi.from_buffer() */
-            return 1;
+    else if (Py_TYPE(cd) == &CDataFromBuf_Type) {
+        return 1;    /* ffi.from_buffer() */
     }
     else if (Py_TYPE(cd) == &CDataGCP_Type) {
         return 2;    /* ffi.gc() */
@@ -3178,14 +3311,14 @@
                 PyObject *x = ((CDataObject_own_structptr *)cd)->structobj;
                 if (Py_TYPE(x) == &CDataGCP_Type) {
                     /* this is a special case for
-                       ffi.new_allocator()("struct-or-union") */
+                       ffi.new_allocator()("struct-or-union *") */
                     cdatagcp_finalize((CDataObject_gcp *)x);
                 }
             }
             break;
 
         case 1:    /* ffi.from_buffer() */
-            view = ((CDataObject_owngc_frombuf *)cd)->bufferview;
+            view = ((CDataObject_frombuf *)cd)->bufferview;
             PyBuffer_Release(view);
             break;
 
@@ -3259,7 +3392,7 @@
 
 static PyTypeObject CData_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CData",
+    "_cffi_backend._CDataBase",
     sizeof(CDataObject),
     0,
     (destructor)cdata_dealloc,                  /* tp_dealloc */
@@ -3271,14 +3404,16 @@
     &CData_as_number,                           /* tp_as_number */
     0,                                          /* tp_as_sequence */
     &CData_as_mapping,                          /* tp_as_mapping */
-    (hashfunc)cdata_hash,                       /* tp_hash */
+    cdata_hash,                                 /* tp_hash */
     (ternaryfunc)cdata_call,                    /* tp_call */
     0,                                          /* tp_str */
     (getattrofunc)cdata_getattro,               /* tp_getattro */
     (setattrofunc)cdata_setattro,               /* tp_setattro */
     0,                                          /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */
-    0,                                          /* tp_doc */
+    "The internal base type for CData objects.  Use FFI.CData to access "
+    "it.  Always check with isinstance(): subtypes are sometimes returned "
+    "on CPython, for performance reasons.",     /* tp_doc */
     0,                                          /* tp_traverse */
     0,                                          /* tp_clear */
     cdata_richcompare,                          /* tp_richcompare */
@@ -3301,7 +3436,7 @@
 
 static PyTypeObject CDataOwning_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CDataOwn",
+    "_cffi_backend.__CDataOwn",
     sizeof(CDataObject),
     0,
     (destructor)cdataowning_dealloc,            /* tp_dealloc */
@@ -3320,7 +3455,8 @@
     0,  /* inherited */                         /* tp_setattro */
     0,                                          /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */
-    0,                                          /* tp_doc */
+    "This is an internal subtype of _CDataBase for performance only on "
+    "CPython.  Check with isinstance(x, ffi.CData).",   /* tp_doc */
     0,                                          /* tp_traverse */
     0,                                          /* tp_clear */
     0,  /* inherited */                         /* tp_richcompare */
@@ -3343,8 +3479,8 @@
 
 static PyTypeObject CDataOwningGC_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CDataOwnGC",
-    sizeof(CDataObject_owngc_frombuf),
+    "_cffi_backend.__CDataOwnGC",
+    sizeof(CDataObject_own_structptr),
     0,
     (destructor)cdataowninggc_dealloc,          /* tp_dealloc */
     0,                                          /* tp_print */
@@ -3363,7 +3499,8 @@
     0,                                          /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES  /* tp_flags */
                        | Py_TPFLAGS_HAVE_GC,
-    0,                                          /* tp_doc */
+    "This is an internal subtype of _CDataBase for performance only on "
+    "CPython.  Check with isinstance(x, ffi.CData).",   /* tp_doc */
     (traverseproc)cdataowninggc_traverse,       /* tp_traverse */
     (inquiry)cdataowninggc_clear,               /* tp_clear */
     0,  /* inherited */                         /* tp_richcompare */
@@ -3384,9 +3521,53 @@
     PyObject_GC_Del,                            /* tp_free */
 };
 
+static PyTypeObject CDataFromBuf_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_cffi_backend.__CDataFromBuf",
+    sizeof(CDataObject_frombuf),
+    0,
+    (destructor)cdatafrombuf_dealloc,           /* tp_dealloc */
+    0,                                          /* tp_print */
+    0,                                          /* tp_getattr */
+    0,                                          /* tp_setattr */
+    0,                                          /* tp_compare */
+    (reprfunc)cdatafrombuf_repr,                /* tp_repr */
+    0,  /* inherited */                         /* tp_as_number */
+    0,                                          /* tp_as_sequence */
+    0,  /* inherited */                         /* tp_as_mapping */
+    0,  /* inherited */                         /* tp_hash */
+    0,  /* inherited */                         /* tp_call */
+    0,                                          /* tp_str */
+    0,  /* inherited */                         /* tp_getattro */
+    0,  /* inherited */                         /* tp_setattro */
+    0,                                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES  /* tp_flags */
+                       | Py_TPFLAGS_HAVE_GC,
+    "This is an internal subtype of _CDataBase for performance only on "
+    "CPython.  Check with isinstance(x, ffi.CData).",   /* tp_doc */
+    (traverseproc)cdatafrombuf_traverse,        /* tp_traverse */
+    (inquiry)cdatafrombuf_clear,                /* tp_clear */
+    0,  /* inherited */                         /* tp_richcompare */
+    0,  /* inherited */                         /* tp_weaklistoffset */
+    0,  /* inherited */                         /* tp_iter */
+    0,                                          /* tp_iternext */
+    0,  /* inherited */                         /* tp_methods */
+    0,                                          /* tp_members */
+    0,                                          /* tp_getset */
+    &CData_Type,                                /* tp_base */
+    0,                                          /* tp_dict */
+    0,                                          /* tp_descr_get */
+    0,                                          /* tp_descr_set */
+    0,                                          /* tp_dictoffset */
+    0,                                          /* tp_init */
+    0,                                          /* tp_alloc */
+    0,                                          /* tp_new */
+    PyObject_GC_Del,                            /* tp_free */
+};
+
 static PyTypeObject CDataGCP_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CDataGCP",
+    "_cffi_backend.__CDataGCP",
     sizeof(CDataObject_gcp),
     0,
     (destructor)cdatagcp_dealloc,               /* tp_dealloc */
@@ -3409,7 +3590,8 @@
                        | Py_TPFLAGS_HAVE_FINALIZE
 #endif
                        | Py_TPFLAGS_HAVE_GC,
-    0,                                          /* tp_doc */
+    "This is an internal subtype of _CDataBase for performance only on "
+    "CPython.  Check with isinstance(x, ffi.CData).",   /* tp_doc */
     (traverseproc)cdatagcp_traverse,            /* tp_traverse */
     0,                                          /* tp_clear */
     0,  /* inherited */                         /* tp_richcompare */
@@ -3470,7 +3652,7 @@
 
 static PyTypeObject CDataIter_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.CDataIter",              /* tp_name */
+    "_cffi_backend.__CData_iterator",       /* tp_name */
     sizeof(CDataIterObject),                /* tp_basicsize */
     0,                                      /* tp_itemsize */
     /* methods */
@@ -4086,11 +4268,12 @@
     PyObject_HEAD
     void *dl_handle;
     char *dl_name;
+    int dl_auto_close;
 } DynLibObject;
 
 static void dl_dealloc(DynLibObject *dlobj)
 {
-    if (dlobj->dl_handle != NULL)
+    if (dlobj->dl_handle != NULL && dlobj->dl_auto_close)
         dlclose(dlobj->dl_handle);
     free(dlobj->dl_name);
     PyObject_Del(dlobj);
@@ -4224,7 +4407,7 @@
 
 static PyTypeObject dl_type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_cffi_backend.Library",            /* tp_name */
+    "_cffi_backend.CLibrary",           /* tp_name */
     sizeof(DynLibObject),               /* tp_basicsize */
     0,                                  /* tp_itemsize */
     /* methods */
@@ -4255,7 +4438,7 @@
 };
 
 static void *b_do_dlopen(PyObject *args, const char **p_printable_filename,
-                         PyObject **p_temp)
+                         PyObject **p_temp, int *auto_close)
 {
     /* Logic to call the correct version of dlopen().  Returns NULL in case of error.
        Otherwise, '*p_printable_filename' will point to a printable char version of
@@ -4266,6 +4449,7 @@
     char *filename_or_null;
     int flags = 0;
     *p_temp = NULL;
+    *auto_close = 1;
     
     if (PyTuple_GET_SIZE(args) == 0 || PyTuple_GET_ITEM(args, 0) == Py_None) {
         PyObject *dummy;
@@ -4275,13 +4459,37 @@
         filename_or_null = NULL;
         *p_printable_filename = "<None>";
     }
+    else if (CData_Check(PyTuple_GET_ITEM(args, 0)))
+    {
+        CDataObject *cd;
+        if (!PyArg_ParseTuple(args, "O|i:load_library", &cd, &flags))
+            return NULL;
+        /* 'flags' is accepted but ignored in this case */
+        if ((cd->c_type->ct_flags & CT_IS_VOID_PTR) == 0) {
+            PyErr_Format(PyExc_TypeError,
+                "dlopen() takes a file name or 'void *' handle, not '%s'",
+                cd->c_type->ct_name);
+            return NULL;
+        }
+        handle = cd->c_data;
+        if (handle == NULL) {
+            PyErr_Format(PyExc_RuntimeError, "cannot call dlopen(NULL)");
+            return NULL;
+        }
+        *p_temp = PyText_FromFormat("%p", handle);
+        *p_printable_filename = PyText_AsUTF8(*p_temp);
+        *auto_close = 0;
+        return handle;
+    }
     else
     {
         PyObject *s = PyTuple_GET_ITEM(args, 0);
 #ifdef MS_WIN32
-        Py_UNICODE *filenameW;
-        if (PyArg_ParseTuple(args, "u|i:load_library", &filenameW, &flags))
+        PyObject *filename_unicode;
+        if (PyArg_ParseTuple(args, "U|i:load_library", &filename_unicode, &flags))
         {
+            Py_ssize_t sz1;
+            wchar_t *w1;
 #if PY_MAJOR_VERSION < 3
             s = PyUnicode_AsUTF8String(s);
             if (s == NULL)
@@ -4292,7 +4500,15 @@
             if (*p_printable_filename == NULL)
                 return NULL;
 
-            handle = dlopenW(filenameW);
+            sz1 = PyUnicode_GetSize(filename_unicode) + 1;
+            sz1 *= 2;   /* should not be needed, but you never know */
+            w1 = alloca(sizeof(wchar_t) * sz1);
+            sz1 = PyUnicode_AsWideChar((PyUnicodeObject *)filename_unicode,
+                                       w1, sz1 - 1);
+            if (sz1 < 0)
+                return NULL;
+            w1[sz1] = 0;
+            handle = dlopenW(w1);
             goto got_handle;
         }
         PyErr_Clear();
@@ -4304,19 +4520,31 @@
         if (PyUnicode_Check(s))
         {
             s = PyUnicode_AsUTF8String(s);
-            if (s == NULL)
+            if (s == NULL) {
+                PyMem_Free(filename_or_null);
                 return NULL;
+            }
             *p_temp = s;
         }
 #endif
         *p_printable_filename = PyText_AsUTF8(s);
-        if (*p_printable_filename == NULL)
+        if (*p_printable_filename == NULL) {
+            PyMem_Free(filename_or_null);
             return NULL;
+        }
     }
     if ((flags & (RTLD_NOW | RTLD_LAZY)) == 0)
         flags |= RTLD_NOW;
 
+#ifdef MS_WIN32
+    if (filename_or_null == NULL) {
+        PyErr_SetString(PyExc_OSError, "dlopen(None) not supported on Windows");
+        return NULL;
+    }
+#endif
+
     handle = dlopen(filename_or_null, flags);
+    PyMem_Free(filename_or_null);
 
 #ifdef MS_WIN32
   got_handle:
@@ -4336,8 +4564,9 @@
     PyObject *temp;
     void *handle;
     DynLibObject *dlobj = NULL;
+    int auto_close;
 
-    handle = b_do_dlopen(args, &printable_filename, &temp);
+    handle = b_do_dlopen(args, &printable_filename, &temp, &auto_close);
     if (handle == NULL)
         goto error;
 
@@ -4348,6 +4577,7 @@
     }
     dlobj->dl_handle = handle;
     dlobj->dl_name = strdup(printable_filename);
+    dlobj->dl_auto_close = auto_close;
  
  error:
     Py_XDECREF(temp);
@@ -4831,7 +5061,9 @@
 #ifdef MS_WIN32
         sflags |= SF_MSVC_BITFIELDS;
 #else
-# if defined(__arm__) || defined(__aarch64__)
+# if defined(__APPLE__) && defined(__arm64__)
+        sflags |= SF_GCC_X86_BITFIELDS;
+# elif defined(__arm__) || defined(__aarch64__)
         sflags |= SF_GCC_ARM_BITFIELDS;
 # else
         sflags |= SF_GCC_X86_BITFIELDS;
@@ -4859,8 +5091,8 @@
         if (sflags & SF_STD_FIELD_POS) {
             PyErr_Format(FFIError,
                          "%s: %s%s%s (cdef says %zd, but C compiler says %zd)."
-                         " fix it or use \"...;\" in the cdef for %s to "
-                         "make it flexible",
+                         " fix it or use \"...;\" as the last field in the "
+                         "cdef for %s to make it flexible",
                          ct->ct_name, msg1, txt, msg2,
                          cdef_value, compiler_value,
                          ct->ct_name);
@@ -4871,12 +5103,16 @@
     return 0;
 }
 
+#define ROUNDUP_BYTES(bytes, bits)    ((bytes) + ((bits) > 0))
+
 static PyObject *b_complete_struct_or_union(PyObject *self, PyObject *args)
 {
     CTypeDescrObject *ct;
     PyObject *fields, *interned_fields, *ignored;
     int is_union, alignment;
-    Py_ssize_t boffset, i, nb_fields, boffsetmax, alignedsize, boffsetorg;
+    Py_ssize_t byteoffset, i, nb_fields, byteoffsetmax, alignedsize;
+    int bitoffset;
+    Py_ssize_t byteoffsetorg;
     Py_ssize_t totalsize = -1;
     int totalalignment = -1;
     CFieldObject **previous;
@@ -4915,8 +5151,9 @@
     ct->ct_flags &= ~(CT_CUSTOM_FIELD_POS | CT_WITH_PACKED_CHANGE);
 
     alignment = 1;
-    boffset = 0;         /* this number is in *bits*, not bytes! */
-    boffsetmax = 0;      /* the maximum value of boffset, in bits too */
+    byteoffset = 0;     /* the real value is 'byteoffset+bitoffset*8', which */
+    bitoffset = 0;      /* counts the offset in bits */
+    byteoffsetmax = 0; /* the maximum value of byteoffset-rounded-up-to-byte */
     prev_bitfield_size = 0;
     prev_bitfield_free = 0;
     nb_fields = PyList_GET_SIZE(fields);
@@ -4951,9 +5188,22 @@
                 goto error;
             }
         }
+        else if (ftype->ct_flags & (CT_STRUCT|CT_UNION)) {
+            if (force_lazy_struct(ftype) < 0)   /* for CT_WITH_VAR_ARRAY */
+                return NULL;
+
+            /* GCC (or maybe C99) accepts var-sized struct fields that are not
+               the last field of a larger struct.  That's why there is no
+               check here for "last field": we propagate the flag
+               CT_WITH_VAR_ARRAY to any struct that contains either an open-
+               ended array or another struct that recursively contains an
+               open-ended array. */
+            if (ftype->ct_flags & CT_WITH_VAR_ARRAY)
+                ct->ct_flags |= CT_WITH_VAR_ARRAY;
+        }
 
         if (is_union)
-            boffset = 0;   /* reset each field at offset 0 */
+            byteoffset = bitoffset = 0;   /* reset each field at offset 0 */
 
         /* update the total alignment requirement, but skip it if the
            field is an anonymous bitfield or if SF_PACKED */
@@ -4988,20 +5238,26 @@
                 bs_flag = BS_REGULAR;
 
             /* align this field to its own 'falign' by inserting padding */
-            boffsetorg = (boffset + falignorg*8-1) & ~(falignorg*8-1); /*bits!*/
-            boffset = (boffset + falign*8-1) & ~(falign*8-1); /* bits! */
-            if (boffsetorg != boffset) {
+
+            /* first, pad to the next byte,
+             * then pad to 'falign' or 'falignorg' bytes */
+            byteoffset = ROUNDUP_BYTES(byteoffset, bitoffset);
+            bitoffset = 0;
+            byteoffsetorg = (byteoffset + falignorg-1) & ~(falignorg-1);
+            byteoffset = (byteoffset + falign-1) & ~(falign-1);
+
+            if (byteoffsetorg != byteoffset) {
                 ct->ct_flags |= CT_WITH_PACKED_CHANGE;
             }
 
             if (foffset >= 0) {
                 /* a forced field position: ignore the offset just computed,
                    except to know if we must set CT_CUSTOM_FIELD_POS */
-                if (detect_custom_layout(ct, sflags, boffset / 8, foffset,
+                if (detect_custom_layout(ct, sflags, byteoffset, foffset,
                                          "wrong offset for field '",
                                          PyText_AS_UTF8(fname), "'") < 0)
                     goto error;
-                boffset = foffset * 8;
+                byteoffset = foffset;
             }
 
             if (PyText_GetSize(fname) == 0 &&
@@ -5015,7 +5271,7 @@
                     *previous = _add_field(interned_fields,
                                            get_field_name(ftype, cfsrc),
                                            cfsrc->cf_type,
-                                           boffset / 8 + cfsrc->cf_offset,
+                                           byteoffset + cfsrc->cf_offset,
                                            cfsrc->cf_bitshift,
                                            cfsrc->cf_bitsize,
                                            cfsrc->cf_flags | fflags);
@@ -5028,13 +5284,13 @@
             }
             else {
                 *previous = _add_field(interned_fields, fname, ftype,
-                                        boffset / 8, bs_flag, -1, fflags);
+                                       byteoffset, bs_flag, -1, fflags);
                 if (*previous == NULL)
                     goto error;
                 previous = &(*previous)->cf_next;
             }
             if (ftype->ct_size >= 0)
-                boffset += ftype->ct_size * 8;
+                byteoffset += ftype->ct_size;
             prev_bitfield_size = 0;
         }
         else {
@@ -5071,7 +5327,7 @@
             /* compute the starting position of the theoretical field
                that covers a complete 'ftype', inside of which we will
                locate the real bitfield */
-            field_offset_bytes = boffset / 8;
+            field_offset_bytes = byteoffset;
             field_offset_bytes &= ~(falign - 1);
 
             if (fbitsize == 0) {
@@ -5084,12 +5340,13 @@
                 if (!(sflags & SF_MSVC_BITFIELDS)) {
                     /* GCC's notion of "ftype :0;" */
 
-                    /* pad boffset to a value aligned for "ftype" */
-                    if (boffset > field_offset_bytes * 8) {
+                    /* pad byteoffset to a value aligned for "ftype" */
+                    if (ROUNDUP_BYTES(byteoffset, bitoffset) > field_offset_bytes) {
                         field_offset_bytes += falign;
-                        assert(boffset < field_offset_bytes * 8);
+                        assert(byteoffset < field_offset_bytes);
                     }
-                    boffset = field_offset_bytes * 8;
+                    byteoffset = field_offset_bytes;
+                    bitoffset = 0;
                 }
                 else {
                     /* MSVC's notion of "ftype :0;" */
@@ -5106,7 +5363,8 @@
 
                     /* Can the field start at the offset given by 'boffset'?  It
                        can if it would entirely fit into an aligned ftype field. */
-                    bits_already_occupied = boffset - (field_offset_bytes * 8);
+                    bits_already_occupied = (byteoffset-field_offset_bytes) * 8
+                        + bitoffset;
 
                     if (bits_already_occupied + fbitsize > 8 * ftype->ct_size) {
                         /* it would not fit, we need to start at the next
@@ -5120,15 +5378,18 @@
                             goto error;
                         }
                         field_offset_bytes += falign;
-                        assert(boffset < field_offset_bytes * 8);
-                        boffset = field_offset_bytes * 8;
+                        assert(byteoffset < field_offset_bytes);
+                        byteoffset = field_offset_bytes;
+                        bitoffset = 0;
                         bitshift = 0;
                     }
                     else {
                         bitshift = bits_already_occupied;
                         assert(bitshift >= 0);
                     }
-                    boffset += fbitsize;
+                    bitoffset += fbitsize;
+                    byteoffset += (bitoffset >> 3);
+                    bitoffset &= 7;
                 }
                 else {
                     /* MSVC's algorithm */
@@ -5144,38 +5405,43 @@
                     }
                     else {
                         /* no: start a new full field */
-                        boffset = (boffset + falign*8-1) & ~(falign*8-1); /*align*/
-                        boffset += ftype->ct_size * 8;
+                        byteoffset = ROUNDUP_BYTES(byteoffset, bitoffset);
+                        bitoffset = 0;
+                        /* align */
+                        byteoffset = (byteoffset + falign-1) & ~(falign-1);
+                        byteoffset += ftype->ct_size;
                         bitshift = 0;
                         prev_bitfield_size = ftype->ct_size;
                         prev_bitfield_free = 8 * prev_bitfield_size;
                     }
                     prev_bitfield_free -= fbitsize;
-                    field_offset_bytes = boffset / 8 - ftype->ct_size;
+                    field_offset_bytes = byteoffset - ftype->ct_size;
                 }
-
                 if (sflags & SF_GCC_BIG_ENDIAN)
                     bitshift = 8 * ftype->ct_size - fbitsize - bitshift;
 
-                *previous = _add_field(interned_fields, fname, ftype,
+                if (PyText_GetSize(fname) > 0) {
+
+                    *previous = _add_field(interned_fields, fname, ftype,
                                        field_offset_bytes, bitshift, fbitsize,
                                        fflags);
-                if (*previous == NULL)
-                    goto error;
-                previous = &(*previous)->cf_next;
+                    if (*previous == NULL)
+                        goto error;
+                    previous = &(*previous)->cf_next;
+                }
             }
         }
 
-        if (boffset > boffsetmax)
-            boffsetmax = boffset;
+        assert(bitoffset == (bitoffset & 7));
+        if (ROUNDUP_BYTES(byteoffset, bitoffset) > byteoffsetmax)
+            byteoffsetmax = ROUNDUP_BYTES(byteoffset, bitoffset);
     }
     *previous = NULL;
 
     /* Like C, if the size of this structure would be zero, we compute it
        as 1 instead.  But for ctypes support, we allow the manually-
        specified totalsize to be zero in this case. */
-    boffsetmax = (boffsetmax + 7) / 8;        /* bits -> bytes */
-    alignedsize = (boffsetmax + alignment - 1) & ~(alignment-1);
+    alignedsize = (byteoffsetmax + alignment - 1) & ~(alignment-1);
     if (alignedsize == 0)
         alignedsize = 1;
 
@@ -5186,10 +5452,10 @@
         if (detect_custom_layout(ct, sflags, alignedsize,
                                  totalsize, "wrong total size", "", "") < 0)
             goto error;
-        if (totalsize < boffsetmax) {
+        if (totalsize < byteoffsetmax) {
             PyErr_Format(PyExc_TypeError,
                          "%s cannot be of size %zd: there are fields at least "
-                         "up to %zd", ct->ct_name, totalsize, boffsetmax);
+                         "up to %zd", ct->ct_name, totalsize, byteoffsetmax);
             goto error;
         }
     }
@@ -5585,11 +5851,14 @@
 
 static cif_description_t *fb_prepare_cif(PyObject *fargs,
                                          CTypeDescrObject *fresult,
+                                         Py_ssize_t variadic_nargs_declared,
                                          ffi_abi fabi)
+
 {
     char *buffer;
     cif_description_t *cif_descr;
     struct funcbuilder_s funcbuffer;
+    ffi_status status = (ffi_status)-1;
 
     funcbuffer.nb_bytes = 0;
     funcbuffer.bufferp = NULL;
@@ -5612,8 +5881,24 @@
     assert(funcbuffer.bufferp == buffer + funcbuffer.nb_bytes);
 
     cif_descr = (cif_description_t *)buffer;
-    if (ffi_prep_cif(&cif_descr->cif, fabi, funcbuffer.nargs,
-                     funcbuffer.rtype, funcbuffer.atypes) != FFI_OK) {
+
+    /* use `ffi_prep_cif_var` if necessary and available */
+#if CFFI_CHECK_FFI_PREP_CIF_VAR_MAYBE
+    if (variadic_nargs_declared >= 0) {
+        if (CFFI_CHECK_FFI_PREP_CIF_VAR) {
+            status = ffi_prep_cif_var(&cif_descr->cif, fabi,
+                                      variadic_nargs_declared, funcbuffer.nargs,
+                                      funcbuffer.rtype, funcbuffer.atypes);
+        }
+    }
+#endif
+
+    if (status == (ffi_status)-1) {
+        status = ffi_prep_cif(&cif_descr->cif, fabi, funcbuffer.nargs,
+                              funcbuffer.rtype, funcbuffer.atypes);
+    }
+
+    if (status != FFI_OK) {
         PyErr_SetString(PyExc_SystemError,
                         "libffi failed to build this function type");
         goto error;
@@ -5657,7 +5942,7 @@
            is computed here. */
         cif_description_t *cif_descr;
 
-        cif_descr = fb_prepare_cif(fargs, fresult, fabi);
+        cif_descr = fb_prepare_cif(fargs, fresult, -1, fabi);
         if (cif_descr == NULL) {
             if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
                 PyErr_Clear();   /* will get the exception if we see an
@@ -5785,6 +6070,43 @@
                                       char *extra_error_line)
 {
     /* like PyErr_WriteUnraisable(), but write a full traceback */
+#ifdef USE_WRITEUNRAISABLEMSG
+
+    /* PyErr_WriteUnraisable actually writes the full traceback anyway
+       from Python 3.4, but we can't really get the formatting of the
+       custom text to be what we want.  We can do better from Python
+       3.8 by calling the new _PyErr_WriteUnraisableMsg().
+       Luckily it's also Python 3.8 that adds new functionality that
+       people might want: the new sys.unraisablehook().
+    */
+    PyObject *s;
+    int first_char;
+    assert(objdescr != NULL && objdescr[0] != 0);   /* non-empty */
+    first_char = objdescr[0];
+    if (first_char >= 'A' && first_char <= 'Z')
+        first_char += 'a' - 'A';    /* lower() the very first character */
+    if (extra_error_line == NULL)
+        extra_error_line = "";
+
+    if (obj != NULL)
+        s = PyUnicode_FromFormat("%c%s%R%s",
+            first_char, objdescr + 1, obj, extra_error_line);
+    else
+        s = PyUnicode_FromFormat("%c%s%s",
+            first_char, objdescr + 1, extra_error_line);
+
+    PyErr_Restore(t, v, tb);
+    if (s != NULL) {
+        _PyErr_WriteUnraisableMsg(PyText_AS_UTF8(s), NULL);
+        Py_DECREF(s);
+    }
+    else
+        PyErr_WriteUnraisable(obj);   /* best effort */
+    PyErr_Clear();
+
+#else
+
+    /* version for Python 2.7 and < 3.8 */
     PyObject *f;
 #if PY_MAJOR_VERSION >= 3
     /* jump through hoops to ensure the tb is attached to v, on Python 3 */
@@ -5809,6 +6131,8 @@
     Py_XDECREF(t);
     Py_XDECREF(v);
     Py_XDECREF(tb);
+
+#endif
 }
 
 static void general_invoke_callback(int decode_args_from_libffi,
@@ -5858,7 +6182,11 @@
         goto error;
     if (convert_from_object_fficallback(result, SIGNATURE(1), py_res,
                                         decode_args_from_libffi) < 0) {
+#ifdef USE_WRITEUNRAISABLEMSG
+        extra_error_line = ", trying to convert the result back to C";
+#else
         extra_error_line = "Trying to convert the result back to C:\n";
+#endif
         goto error;
     }
  done:
@@ -5910,10 +6238,16 @@
             _my_PyErr_WriteUnraisable(exc1, val1, tb1,
                                       "From cffi callback ", py_ob,
                                       extra_error_line);
+#ifdef USE_WRITEUNRAISABLEMSG
+            _my_PyErr_WriteUnraisable(exc2, val2, tb2,
+                 "during handling of the above exception by 'onerror'",
+                 NULL, NULL);
+#else
             extra_error_line = ("\nDuring the call to 'onerror', "
                                 "another exception occurred:\n\n");
             _my_PyErr_WriteUnraisable(exc2, val2, tb2,
                                       NULL, NULL, extra_error_line);
+#endif
             _cffi_stop_error_capture(ecap);
         }
     }
@@ -5981,15 +6315,28 @@
     infotuple = Py_BuildValue("OOOO", ct, ob, py_rawerr, onerror_ob);
     Py_DECREF(py_rawerr);
 
-#ifdef WITH_THREAD
+#if defined(WITH_THREAD) && PY_VERSION_HEX < 0x03070000
     /* We must setup the GIL here, in case the callback is invoked in
-       some other non-Pythonic thread.  This is the same as ctypes. */
+       some other non-Pythonic thread.  This is the same as ctypes.
+       But PyEval_InitThreads() is always a no-op from CPython 3.7
+       (the call from ctypes was removed some time later I think). */
     PyEval_InitThreads();
 #endif
 
     return infotuple;
 }
 
+/* messily try to silence a gcc/clang deprecation warning for
+   ffi_prep_closure.  Don't miss the "pragma pop" after the function.
+   This is done around the whole function because very old GCCs don't
+   support it inside a function. */
+#if defined(__clang__)
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
 static PyObject *b_callback(PyObject *self, PyObject *args)
 {
     CTypeDescrObject *ct;
@@ -5998,6 +6345,7 @@
     PyObject *infotuple;
     cif_description_t *cif_descr;
     ffi_closure *closure;
+    ffi_status status;
     void *closure_exec;
 
     if (!PyArg_ParseTuple(args, "O!O|OO:callback", &CTypeDescr_Type, &ct, &ob,
@@ -6008,14 +6356,23 @@
     if (infotuple == NULL)
         return NULL;
 
-#ifdef CFFI_TRUST_LIBFFI
-    closure = ffi_closure_alloc(sizeof(ffi_closure), &closure_exec);
-#else
-    closure = cffi_closure_alloc();
-    closure_exec = closure;
+#if CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE
+    if (CFFI_CHECK_FFI_CLOSURE_ALLOC) {
+        closure = ffi_closure_alloc(sizeof(ffi_closure), &closure_exec);
+    } else
 #endif
+    {
+        closure = cffi_closure_alloc();
+        closure_exec = closure;
+    }
+
     if (closure == NULL) {
         Py_DECREF(infotuple);
+        PyErr_SetString(PyExc_MemoryError,
+            "Cannot allocate write+execute memory for ffi.callback(). "
+            "You might be running on a system that prevents this. "
+            "For more information, see "
+            "https://cffi.readthedocs.io/en/latest/using.html#callbacks");
         return NULL;
     }
     cd = PyObject_GC_New(CDataObject_closure, &CDataOwningGC_Type);
@@ -6025,8 +6382,8 @@
     cd->head.c_type = ct;
     cd->head.c_data = (char *)closure_exec;
     cd->head.c_weakreflist = NULL;
+    closure->user_data = NULL;
     cd->closure = closure;
-    PyObject_GC_Track(cd);
 
     cif_descr = (cif_description_t *)ct->ct_extra;
     if (cif_descr == NULL) {
@@ -6035,17 +6392,30 @@
                      "return type or with '...'", ct->ct_name);
         goto error;
     }
-#ifdef CFFI_TRUST_LIBFFI
-    if (ffi_prep_closure_loc(closure, &cif_descr->cif,
-                         invoke_callback, infotuple, closure_exec) != FFI_OK) {
-#else
-    if (ffi_prep_closure(closure, &cif_descr->cif,
-                         invoke_callback, infotuple) != FFI_OK) {
+
+#if CFFI_CHECK_FFI_PREP_CLOSURE_LOC_MAYBE
+    if (CFFI_CHECK_FFI_PREP_CLOSURE_LOC) {
+        status = ffi_prep_closure_loc(closure, &cif_descr->cif,
+                                      invoke_callback, infotuple, closure_exec);
+    }
+    else
 #endif
+    {
+#if defined(__APPLE__) && defined(FFI_AVAILABLE_APPLE) && !FFI_LEGACY_CLOSURE_API
+        PyErr_Format(PyExc_SystemError, "ffi_prep_closure_loc() is missing");
+        goto error;
+#else
+        status = ffi_prep_closure(closure, &cif_descr->cif,
+                                  invoke_callback, infotuple);
+#endif
+    }
+
+    if (status != FFI_OK) {
         PyErr_SetString(PyExc_SystemError,
                         "libffi failed to build this callback");
         goto error;
     }
+
     if (closure->user_data != infotuple) {
         /* Issue #266.  Should not occur, but could, if we are using
            at runtime a version of libffi compiled with a different
@@ -6060,22 +6430,30 @@
             "different from the 'ffi.h' file seen at compile-time)");
         goto error;
     }
+    PyObject_GC_Track(cd);
     return (PyObject *)cd;
 
  error:
     closure->user_data = NULL;
     if (cd == NULL) {
-#ifdef CFFI_TRUST_LIBFFI
-        ffi_closure_free(closure);
-#else
-        cffi_closure_free(closure);
+#if CFFI_CHECK_FFI_CLOSURE_ALLOC_MAYBE
+        if (CFFI_CHECK_FFI_CLOSURE_ALLOC) {
+            ffi_closure_free(closure);
+        }
+        else
 #endif
+            cffi_closure_free(closure);
     }
     else
         Py_DECREF(cd);
     Py_XDECREF(infotuple);
     return NULL;
 }
+#if defined(__clang__)
+#  pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic pop
+#endif
 
 static PyObject *b_new_enum_type(PyObject *self, PyObject *args)
 {
@@ -6697,7 +7075,7 @@
                                                       &CDataOwningGC_Type);
     if (cd == NULL)
         return NULL;
-    Py_INCREF(ct_voidp);
+    Py_INCREF(ct_voidp);        /* must be "void *" */
     cd->head.c_type = ct_voidp;
     cd->head.c_data = (char *)cd;
     cd->head.c_weakreflist = NULL;
@@ -6815,10 +7193,11 @@
 {
     CDataObject *cd;
     Py_buffer *view;
-    Py_ssize_t arraylength;
+    Py_ssize_t arraylength, minimumlength = 0;
 
-    if (!(ct->ct_flags & CT_ARRAY)) {
-        PyErr_Format(PyExc_TypeError, "expected an array ctype, got '%s'",
+    if (!(ct->ct_flags & (CT_ARRAY | CT_POINTER))) {
+        PyErr_Format(PyExc_TypeError,
+                     "expected a pointer or array ctype, got '%s'",
                      ct->ct_name);
         return NULL;
     }
@@ -6841,43 +7220,51 @@
     if (_my_PyObject_GetContiguousBuffer(x, view, require_writable) < 0)
         goto error1;
 
-    if (ct->ct_length >= 0) {
-        /* it's an array with a fixed length; make sure that the
-           buffer contains enough bytes. */
-        if (view->len < ct->ct_size) {
-            PyErr_Format(PyExc_ValueError,
-                "buffer is too small (%zd bytes) for '%s' (%zd bytes)",
-                view->len, ct->ct_name, ct->ct_size);
-            goto error2;
-        }
-        arraylength = ct->ct_length;
+    if (ct->ct_flags & CT_POINTER)
+    {
+        arraylength = view->len;   /* number of bytes, not used so far */
     }
     else {
-        /* it's an open 'array[]' */
-        if (ct->ct_itemdescr->ct_size == 1) {
-            /* fast path, performance only */
-            arraylength = view->len;
-        }
-        else if (ct->ct_itemdescr->ct_size > 0) {
-            /* give it as many items as fit the buffer.  Ignore a
-               partial last element. */
-            arraylength = view->len / ct->ct_itemdescr->ct_size;
+        /* ct->ct_flags & CT_ARRAY */
+        if (ct->ct_length >= 0) {
+            /* it's an array with a fixed length; make sure that the
+               buffer contains enough bytes. */
+            minimumlength = ct->ct_size;
+            arraylength = ct->ct_length;
         }
         else {
-            /* it's an array 'empty[]'.  Unsupported obscure case:
-               the problem is that setting the length of the result
-               to anything large (like SSIZE_T_MAX) is dangerous,
-               because if someone tries to loop over it, it will
-               turn effectively into an infinite loop. */
-            PyErr_Format(PyExc_ZeroDivisionError,
-                "from_buffer('%s', ..): the actual length of the array "
-                "cannot be computed", ct->ct_name);
-            goto error2;
+            /* it's an open 'array[]' */
+            if (ct->ct_itemdescr->ct_size == 1) {
+                /* fast path, performance only */
+                arraylength = view->len;
+            }
+            else if (ct->ct_itemdescr->ct_size > 0) {
+                /* give it as many items as fit the buffer.  Ignore a
+                   partial last element. */
+                arraylength = view->len / ct->ct_itemdescr->ct_size;
+            }
+            else {
+                /* it's an array 'empty[]'.  Unsupported obscure case:
+                   the problem is that setting the length of the result
+                   to anything large (like SSIZE_T_MAX) is dangerous,
+                   because if someone tries to loop over it, it will
+                   turn effectively into an infinite loop. */
+                PyErr_Format(PyExc_ZeroDivisionError,
+                    "from_buffer('%s', ..): the actual length of the array "
+                    "cannot be computed", ct->ct_name);
+                goto error2;
+            }
         }
     }
+    if (view->len < minimumlength) {
+        PyErr_Format(PyExc_ValueError,
+            "buffer is too small (%zd bytes) for '%s' (%zd bytes)",
+            view->len, ct->ct_name, minimumlength);
+        goto error2;
+    }
 
-    cd = (CDataObject *)PyObject_GC_New(CDataObject_owngc_frombuf,
-                                        &CDataOwningGC_Type);
+    cd = (CDataObject *)PyObject_GC_New(CDataObject_frombuf,
+                                        &CDataFromBuf_Type);
     if (cd == NULL)
         goto error2;
 
@@ -6885,8 +7272,8 @@
     cd->c_type = ct;
     cd->c_data = view->buf;
     cd->c_weakreflist = NULL;
-    ((CDataObject_owngc_frombuf *)cd)->length = arraylength;
-    ((CDataObject_owngc_frombuf *)cd)->bufferview = view;
+    ((CDataObject_frombuf *)cd)->length = arraylength;
+    ((CDataObject_frombuf *)cd)->bufferview = view;
     PyObject_GC_Track(cd);
     return (PyObject *)cd;
 
@@ -7576,6 +7963,22 @@
     PyObject *m, *v;
     int i;
     static char init_done = 0;
+    static PyTypeObject *all_types[] = {
+        &dl_type,
+        &CTypeDescr_Type,
+        &CField_Type,
+        &CData_Type,
+        &CDataOwning_Type,
+        &CDataOwningGC_Type,
+        &CDataFromBuf_Type,
+        &CDataGCP_Type,
+        &CDataIter_Type,
+        &MiniBuffer_Type,
+        &FFI_Type,
+        &Lib_Type,
+        &GlobSupport_Type,
+        NULL
+    };
 
     v = PySys_GetObject("version");
     if (v == NULL || !PyText_Check(v) ||
@@ -7601,24 +8004,22 @@
             INITERROR;
     }
 
-    if (PyType_Ready(&dl_type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CTypeDescr_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CField_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CData_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CDataOwning_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CDataOwningGC_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CDataGCP_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&CDataIter_Type) < 0)
-        INITERROR;
-    if (PyType_Ready(&MiniBuffer_Type) < 0)
-        INITERROR;
+    /* readify all types and add them to the module */
+    for (i = 0; all_types[i] != NULL; i++) {
+        PyTypeObject *tp = all_types[i];
+        PyObject *tpo = (PyObject *)tp;
+        if (strncmp(tp->tp_name, "_cffi_backend.", 14) != 0) {
+            PyErr_Format(PyExc_ImportError,
+                         "'%s' is an ill-formed type name", tp->tp_name);
+            INITERROR;
+        }
+        if (PyType_Ready(tp) < 0)
+            INITERROR;
+
+        Py_INCREF(tpo);
+        if (PyModule_AddObject(m, tp->tp_name + 14, tpo) < 0)
+            INITERROR;
+    }
 
     if (!init_done) {
         v = PyText_FromString("_cffi_backend");
@@ -7664,10 +8065,6 @@
             INITERROR;
     }
 
-    Py_INCREF(&MiniBuffer_Type);
-    if (PyModule_AddObject(m, "buffer", (PyObject *)&MiniBuffer_Type) < 0)
-        INITERROR;
-
     init_cffi_tls();
     if (PyErr_Occurred())
         INITERROR;
diff --git a/c/_cffi_backend.so b/c/_cffi_backend.so
old mode 100644
new mode 100755
index 7055ae6..31f7528
--- a/c/_cffi_backend.so
+++ b/c/_cffi_backend.so
Binary files differ
diff --git a/c/call_python.c b/c/call_python.c
index 8fdcb90..d3d2e17 100644
--- a/c/call_python.c
+++ b/c/call_python.c
@@ -1,10 +1,18 @@
 #if PY_VERSION_HEX >= 0x03080000
-# define Py_BUILD_CORE
-/* for access to the fields of PyInterpreterState */
-#  include "internal/pycore_pystate.h"
-# undef Py_BUILD_CORE
+# define HAVE_PYINTERPSTATE_GETDICT
 #endif
 
+
+static PyObject *_current_interp_key(void)
+{
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+#ifdef HAVE_PYINTERPSTATE_GETDICT
+    return PyInterpreterState_GetDict(interp);   /* shared reference */
+#else
+    return interp->modules;
+#endif
+}
+
 static PyObject *_get_interpstate_dict(void)
 {
     /* Hack around to return a dict that is subinterpreter-local.
@@ -14,8 +22,9 @@
     */
     static PyObject *attr_name = NULL;
     PyThreadState *tstate;
-    PyObject *d, *builtins;
+    PyObject *d, *interpdict;
     int err;
+    PyInterpreterState *interp;
 
     tstate = PyThreadState_GET();
     if (tstate == NULL) {
@@ -23,8 +32,13 @@
         return NULL;
     }
 
-    builtins = tstate->interp->builtins;
-    if (builtins == NULL) {
+    interp = tstate->interp;
+#ifdef HAVE_PYINTERPSTATE_GETDICT
+    interpdict = PyInterpreterState_GetDict(interp);   /* shared reference */
+#else
+    interpdict = interp->builtins;
+#endif
+    if (interpdict == NULL) {
         /* subinterpreter was cleared already, or is being cleared right now,
            to a point that is too much for us to continue */
         return NULL;
@@ -38,13 +52,13 @@
             goto error;
     }
 
-    d = PyDict_GetItem(builtins, attr_name);
+    d = PyDict_GetItem(interpdict, attr_name);
     if (d == NULL) {
         d = PyDict_New();
         if (d == NULL)
             goto error;
-        err = PyDict_SetItem(builtins, attr_name, d);
-        Py_DECREF(d);    /* if successful, there is one ref left in builtins */
+        err = PyDict_SetItem(interpdict, attr_name, d);
+        Py_DECREF(d);   /* if successful, there is one ref left in interpdict */
         if (err < 0)
             goto error;
     }
@@ -163,7 +177,7 @@
     if (infotuple == NULL)
         return 3;    /* no ffi.def_extern() from this subinterpreter */
 
-    new1 = PyThreadState_GET()->interp->modules;
+    new1 = _current_interp_key();
     Py_INCREF(new1);
     Py_INCREF(infotuple);
     old1 = (PyObject *)externpy->reserved1;
@@ -252,7 +266,7 @@
     }
     else {
         PyGILState_STATE state = gil_ensure();
-        if (externpy->reserved1 != PyThreadState_GET()->interp->modules) {
+        if (externpy->reserved1 != _current_interp_key()) {
             /* Update the (reserved1, reserved2) cache.  This will fail
                if we didn't call @ffi.def_extern() in this particular
                subinterpreter. */
diff --git a/c/cdlopen.c b/c/cdlopen.c
index ad33bbd..0ed319b 100644
--- a/c/cdlopen.c
+++ b/c/cdlopen.c
@@ -43,12 +43,13 @@
     const char *modname;
     PyObject *temp, *result = NULL;
     void *handle;
+    int auto_close;
 
-    handle = b_do_dlopen(args, &modname, &temp);
+    handle = b_do_dlopen(args, &modname, &temp, &auto_close);
     if (handle != NULL)
     {
         result = (PyObject *)lib_internal_new((FFIObject *)self,
-                                              modname, handle);
+                                              modname, handle, auto_close);
     }
     Py_XDECREF(temp);
     return result;
diff --git a/c/cffi1_module.c b/c/cffi1_module.c
index 2b98e8e..06a84fe 100644
--- a/c/cffi1_module.c
+++ b/c/cffi1_module.c
@@ -26,11 +26,6 @@
     int i, res;
     static char init_done = 0;
 
-    if (PyType_Ready(&FFI_Type) < 0)
-        return -1;
-    if (PyType_Ready(&Lib_Type) < 0)
-        return -1;
-
     if (!init_done) {
         if (init_global_types_dict(FFI_Type.tp_dict) < 0)
             return -1;
@@ -62,16 +57,6 @@
         }
         init_done = 1;
     }
-
-    x = (PyObject *)&FFI_Type;
-    Py_INCREF(x);
-    if (PyModule_AddObject(m, "FFI", x) < 0)
-        return -1;
-    x = (PyObject *)&Lib_Type;
-    Py_INCREF(x);
-    if (PyModule_AddObject(m, "Lib", x) < 0)
-        return -1;
-
     return 0;
 }
 
@@ -199,7 +184,7 @@
     if (ffi == NULL || PyModule_AddObject(m, "ffi", (PyObject *)ffi) < 0)
         return NULL;
 
-    lib = lib_internal_new(ffi, module_name, NULL);
+    lib = lib_internal_new(ffi, module_name, NULL, 0);
     if (lib == NULL || PyModule_AddObject(m, "lib", (PyObject *)lib) < 0)
         return NULL;
 
diff --git a/c/cglob.c b/c/cglob.c
index 9ee4025..e97767c 100644
--- a/c/cglob.c
+++ b/c/cglob.c
@@ -20,7 +20,7 @@
 
 static PyTypeObject GlobSupport_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "FFIGlobSupport",
+    "_cffi_backend.__FFIGlobSupport",
     sizeof(GlobSupportObject),
     0,
     (destructor)glob_support_dealloc,           /* tp_dealloc */
diff --git a/c/ffi_obj.c b/c/ffi_obj.c
index 1e8cc6f..f154146 100644
--- a/c/ffi_obj.c
+++ b/c/ffi_obj.c
@@ -1070,10 +1070,10 @@
         if (res != NULL) {
             tup = PyTuple_Pack(2, Py_True, res);
             if (tup == NULL || PyDict_SetItem(cache, tag, tup) < 0) {
-                Py_XDECREF(tup);
                 Py_DECREF(res);
                 res = NULL;
             }
+            Py_XDECREF(tup);
         }
     }
 
@@ -1137,7 +1137,7 @@
 
 static PyTypeObject FFI_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "CompiledFFI",
+    "_cffi_backend.FFI",
     sizeof(FFIObject),
     0,
     (destructor)ffi_dealloc,                    /* tp_dealloc */
diff --git a/c/lib_obj.c b/c/lib_obj.c
index 7cd40ec..38bf3d5 100644
--- a/c/lib_obj.c
+++ b/c/lib_obj.c
@@ -29,6 +29,7 @@
     PyObject *l_libname;        /* some string that gives the name of the lib */
     FFIObject *l_ffi;           /* reference back to the ffi object */
     void *l_libhandle;          /* the dlopen()ed handle, if any */
+    int l_auto_close;           /* if we must dlclose() this handle */
 };
 
 static struct CPyExtFunc_s *_cpyextfunc_get(PyObject *x)
@@ -91,7 +92,8 @@
 static void lib_dealloc(LibObject *lib)
 {
     PyObject_GC_UnTrack(lib);
-    cdlopen_close_ignore_errors(lib->l_libhandle);
+    if (lib->l_auto_close)
+        cdlopen_close_ignore_errors(lib->l_libhandle);
     Py_DECREF(lib->l_dict);
     Py_DECREF(lib->l_libname);
     Py_DECREF(lib->l_ffi);
@@ -587,7 +589,7 @@
 
 static PyTypeObject Lib_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "CompiledLib",
+    "_cffi_backend.Lib",
     sizeof(LibObject),
     0,
     (destructor)lib_dealloc,                    /* tp_dealloc */
@@ -624,7 +626,7 @@
 };
 
 static LibObject *lib_internal_new(FFIObject *ffi, const char *module_name,
-                                   void *dlopen_libhandle)
+                                   void *dlopen_libhandle, int auto_close)
 {
     LibObject *lib;
     PyObject *libname, *dict;
@@ -647,6 +649,7 @@
     Py_INCREF(ffi);
     lib->l_ffi = ffi;
     lib->l_libhandle = dlopen_libhandle;
+    lib->l_auto_close = auto_close;
     return lib;
 
  err3:
@@ -654,7 +657,8 @@
  err2:
     Py_DECREF(libname);
  err1:
-    cdlopen_close_ignore_errors(dlopen_libhandle);
+    if (auto_close)
+        cdlopen_close_ignore_errors(dlopen_libhandle);
     return NULL;
 }
 
diff --git a/c/libffi_arm64/README b/c/libffi_arm64/README
new file mode 100644
index 0000000..3b8f133
--- /dev/null
+++ b/c/libffi_arm64/README
@@ -0,0 +1,5 @@
+Libffi package for ARM64 is copied from cpython binary dependencies
+
+https://github.com/python/cpython-bin-deps/archive/libffi.zip
+
+The library file has been renamed from libffi-7.lib to ffi.lib to avoid special casing
\ No newline at end of file
diff --git a/c/libffi_arm64/ffi.lib b/c/libffi_arm64/ffi.lib
new file mode 100644
index 0000000..4a8b84b
--- /dev/null
+++ b/c/libffi_arm64/ffi.lib
Binary files differ
diff --git a/c/libffi_arm64/include/ffi.h b/c/libffi_arm64/include/ffi.h
new file mode 100644
index 0000000..d91c3e1
--- /dev/null
+++ b/c/libffi_arm64/include/ffi.h
@@ -0,0 +1,515 @@
+/* -----------------------------------------------------------------*-C-*-
+   libffi 3.3-rc0 - Copyright (c) 2011, 2014 Anthony Green
+                    - Copyright (c) 1996-2003, 2007, 2008 Red Hat, Inc.
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the ``Software''), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+/* -------------------------------------------------------------------
+   Most of the API is documented in doc/libffi.texi.
+
+   The raw API is designed to bypass some of the argument packing and
+   unpacking on architectures for which it can be avoided.  Routines
+   are provided to emulate the raw API if the underlying platform
+   doesn't allow faster implementation.
+
+   More details on the raw API can be found in:
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00138.html
+
+   and
+
+   http://gcc.gnu.org/ml/java/1999-q3/msg00174.html
+   -------------------------------------------------------------------- */
+
+#ifndef LIBFFI_H
+#define LIBFFI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Specify which architecture libffi is configured for. */
+#ifndef ARM_WIN64
+#define ARM_WIN64
+#endif
+
+/* ---- System configuration information --------------------------------- */
+
+#include <ffitarget.h>
+
+#ifndef LIBFFI_ASM
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define __attribute__(X)
+#endif
+
+#include <stddef.h>
+#include <limits.h>
+
+/* LONG_LONG_MAX is not always defined (not if STRICT_ANSI, for example).
+   But we can find it either under the correct ANSI name, or under GNU
+   C's internal name.  */
+
+#define FFI_64_BIT_MAX 9223372036854775807
+
+#ifdef LONG_LONG_MAX
+# define FFI_LONG_LONG_MAX LONG_LONG_MAX
+#else
+# ifdef LLONG_MAX
+#  define FFI_LONG_LONG_MAX LLONG_MAX
+#  ifdef _AIX52 /* or newer has C99 LLONG_MAX */
+#   undef FFI_64_BIT_MAX
+#   define FFI_64_BIT_MAX 9223372036854775807LL
+#  endif /* _AIX52 or newer */
+# else
+#  ifdef __GNUC__
+#   define FFI_LONG_LONG_MAX __LONG_LONG_MAX__
+#  endif
+#  ifdef _AIX /* AIX 5.1 and earlier have LONGLONG_MAX */
+#   ifndef __PPC64__
+#    if defined (__IBMC__) || defined (__IBMCPP__)
+#     define FFI_LONG_LONG_MAX LONGLONG_MAX
+#    endif
+#   endif /* __PPC64__ */
+#   undef  FFI_64_BIT_MAX
+#   define FFI_64_BIT_MAX 9223372036854775807LL
+#  endif
+# endif
+#endif
+
+/* The closure code assumes that this works on pointers, i.e. a size_t
+   can hold a pointer.  */
+
+typedef struct _ffi_type
+{
+  size_t size;
+  unsigned short alignment;
+  unsigned short type;
+  struct _ffi_type **elements;
+} ffi_type;
+
+/* Need minimal decorations for DLLs to work on Windows.  GCC has
+   autoimport and autoexport.  Always mark externally visible symbols
+   as dllimport for MSVC clients, even if it means an extra indirection
+   when using the static version of the library.
+   Besides, as a workaround, they can define FFI_BUILDING if they
+   *know* they are going to link with the static library.  */
+#if defined _MSC_VER
+# if defined FFI_BUILDING_DLL /* Building libffi.DLL with msvcc.sh */
+#  define FFI_API __declspec(dllexport)
+# elif !defined FFI_BUILDING  /* Importing libffi.DLL */
+#  define FFI_API __declspec(dllimport)
+# else                        /* Building/linking static library */
+#  define FFI_API
+# endif
+#else
+# define FFI_API
+#endif
+
+/* The externally visible type declarations also need the MSVC DLL
+   decorations, or they will not be exported from the object file.  */
+#if defined LIBFFI_HIDE_BASIC_TYPES
+# define FFI_EXTERN FFI_API
+#else
+# define FFI_EXTERN extern FFI_API
+#endif
+
+#ifndef LIBFFI_HIDE_BASIC_TYPES
+#if SCHAR_MAX == 127
+# define ffi_type_uchar                ffi_type_uint8
+# define ffi_type_schar                ffi_type_sint8
+#else
+ #error "char size not supported"
+#endif
+
+#if SHRT_MAX == 32767
+# define ffi_type_ushort       ffi_type_uint16
+# define ffi_type_sshort       ffi_type_sint16
+#elif SHRT_MAX == 2147483647
+# define ffi_type_ushort       ffi_type_uint32
+# define ffi_type_sshort       ffi_type_sint32
+#else
+ #error "short size not supported"
+#endif
+
+#if INT_MAX == 32767
+# define ffi_type_uint         ffi_type_uint16
+# define ffi_type_sint         ffi_type_sint16
+#elif INT_MAX == 2147483647
+# define ffi_type_uint         ffi_type_uint32
+# define ffi_type_sint         ffi_type_sint32
+#elif INT_MAX == 9223372036854775807
+# define ffi_type_uint         ffi_type_uint64
+# define ffi_type_sint         ffi_type_sint64
+#else
+ #error "int size not supported"
+#endif
+
+#if LONG_MAX == 2147483647
+# if FFI_LONG_LONG_MAX != FFI_64_BIT_MAX
+ #error "no 64-bit data type supported"
+# endif
+#elif LONG_MAX != FFI_64_BIT_MAX
+ #error "long size not supported"
+#endif
+
+#if LONG_MAX == 2147483647
+# define ffi_type_ulong        ffi_type_uint32
+# define ffi_type_slong        ffi_type_sint32
+#elif LONG_MAX == FFI_64_BIT_MAX
+# define ffi_type_ulong        ffi_type_uint64
+# define ffi_type_slong        ffi_type_sint64
+#else
+ #error "long size not supported"
+#endif
+
+/* These are defined in types.c.  */
+FFI_EXTERN ffi_type ffi_type_void;
+FFI_EXTERN ffi_type ffi_type_uint8;
+FFI_EXTERN ffi_type ffi_type_sint8;
+FFI_EXTERN ffi_type ffi_type_uint16;
+FFI_EXTERN ffi_type ffi_type_sint16;
+FFI_EXTERN ffi_type ffi_type_uint32;
+FFI_EXTERN ffi_type ffi_type_sint32;
+FFI_EXTERN ffi_type ffi_type_uint64;
+FFI_EXTERN ffi_type ffi_type_sint64;
+FFI_EXTERN ffi_type ffi_type_float;
+FFI_EXTERN ffi_type ffi_type_double;
+FFI_EXTERN ffi_type ffi_type_pointer;
+
+#if 0
+FFI_EXTERN ffi_type ffi_type_longdouble;
+#else
+#define ffi_type_longdouble ffi_type_double
+#endif
+
+#ifdef FFI_TARGET_HAS_COMPLEX_TYPE
+FFI_EXTERN ffi_type ffi_type_complex_float;
+FFI_EXTERN ffi_type ffi_type_complex_double;
+#if 0
+FFI_EXTERN ffi_type ffi_type_complex_longdouble;
+#else
+#define ffi_type_complex_longdouble ffi_type_complex_double
+#endif
+#endif
+#endif /* LIBFFI_HIDE_BASIC_TYPES */
+
+typedef enum {
+  FFI_OK = 0,
+  FFI_BAD_TYPEDEF,
+  FFI_BAD_ABI
+} ffi_status;
+
+typedef struct {
+  ffi_abi abi;
+  unsigned nargs;
+  ffi_type **arg_types;
+  ffi_type *rtype;
+  unsigned bytes;
+  unsigned flags;
+#ifdef FFI_EXTRA_CIF_FIELDS
+  FFI_EXTRA_CIF_FIELDS;
+#endif
+} ffi_cif;
+
+/* ---- Definitions for the raw API -------------------------------------- */
+
+#ifndef FFI_SIZEOF_ARG
+# if LONG_MAX == 2147483647
+#  define FFI_SIZEOF_ARG        4
+# elif LONG_MAX == FFI_64_BIT_MAX
+#  define FFI_SIZEOF_ARG        8
+# endif
+#endif
+
+#ifndef FFI_SIZEOF_JAVA_RAW
+#  define FFI_SIZEOF_JAVA_RAW FFI_SIZEOF_ARG
+#endif
+
+typedef union {
+  ffi_sarg  sint;
+  ffi_arg   uint;
+  float	    flt;
+  char      data[FFI_SIZEOF_ARG];
+  void*     ptr;
+} ffi_raw;
+
+#if FFI_SIZEOF_JAVA_RAW == 4 && FFI_SIZEOF_ARG == 8
+/* This is a special case for mips64/n32 ABI (and perhaps others) where
+   sizeof(void *) is 4 and FFI_SIZEOF_ARG is 8.  */
+typedef union {
+  signed int	sint;
+  unsigned int	uint;
+  float		flt;
+  char		data[FFI_SIZEOF_JAVA_RAW];
+  void*		ptr;
+} ffi_java_raw;
+#else
+typedef ffi_raw ffi_java_raw;
+#endif
+
+
+FFI_API 
+void ffi_raw_call (ffi_cif *cif,
+		   void (*fn)(void),
+		   void *rvalue,
+		   ffi_raw *avalue);
+
+FFI_API void ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw);
+FFI_API void ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args);
+FFI_API size_t ffi_raw_size (ffi_cif *cif);
+
+/* This is analogous to the raw API, except it uses Java parameter
+   packing, even on 64-bit machines.  I.e. on 64-bit machines longs
+   and doubles are followed by an empty 64-bit word.  */
+
+#if !FFI_NATIVE_RAW_API
+FFI_API
+void ffi_java_raw_call (ffi_cif *cif,
+			void (*fn)(void),
+			void *rvalue,
+			ffi_java_raw *avalue);
+#endif
+
+FFI_API
+void ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw);
+FFI_API
+void ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args);
+FFI_API
+size_t ffi_java_raw_size (ffi_cif *cif);
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#if FFI_CLOSURES
+
+#ifdef _MSC_VER
+__declspec(align(8))
+#endif
+typedef struct {
+#if 0
+  void *trampoline_table;
+  void *trampoline_table_entry;
+#else
+  char tramp[FFI_TRAMPOLINE_SIZE];
+#endif
+  ffi_cif   *cif;
+  void     (*fun)(ffi_cif*,void*,void**,void*);
+  void      *user_data;
+} ffi_closure
+#ifdef __GNUC__
+    __attribute__((aligned (8)))
+#endif
+    ;
+
+#ifndef __GNUC__
+# ifdef __sgi
+#  pragma pack 0
+# endif
+#endif
+
+FFI_API void *ffi_closure_alloc (size_t size, void **code);
+FFI_API void ffi_closure_free (void *);
+
+FFI_API ffi_status
+ffi_prep_closure (ffi_closure*,
+		  ffi_cif *,
+		  void (*fun)(ffi_cif*,void*,void**,void*),
+		  void *user_data)
+#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 405)
+  __attribute__((deprecated ("use ffi_prep_closure_loc instead")))
+#elif defined(__GNUC__) && __GNUC__ >= 3
+  __attribute__((deprecated))
+#endif
+  ;
+
+FFI_API ffi_status
+ffi_prep_closure_loc (ffi_closure*,
+		      ffi_cif *,
+		      void (*fun)(ffi_cif*,void*,void**,void*),
+		      void *user_data,
+		      void*codeloc);
+
+#ifdef __sgi
+# pragma pack 8
+#endif
+typedef struct {
+#if 0
+  void *trampoline_table;
+  void *trampoline_table_entry;
+#else
+  char tramp[FFI_TRAMPOLINE_SIZE];
+#endif
+  ffi_cif   *cif;
+
+#if !FFI_NATIVE_RAW_API
+
+  /* If this is enabled, then a raw closure has the same layout 
+     as a regular closure.  We use this to install an intermediate 
+     handler to do the transaltion, void** -> ffi_raw*.  */
+
+  void     (*translate_args)(ffi_cif*,void*,void**,void*);
+  void      *this_closure;
+
+#endif
+
+  void     (*fun)(ffi_cif*,void*,ffi_raw*,void*);
+  void      *user_data;
+
+} ffi_raw_closure;
+
+typedef struct {
+#if 0
+  void *trampoline_table;
+  void *trampoline_table_entry;
+#else
+  char tramp[FFI_TRAMPOLINE_SIZE];
+#endif
+
+  ffi_cif   *cif;
+
+#if !FFI_NATIVE_RAW_API
+
+  /* If this is enabled, then a raw closure has the same layout 
+     as a regular closure.  We use this to install an intermediate 
+     handler to do the translation, void** -> ffi_raw*.  */
+
+  void     (*translate_args)(ffi_cif*,void*,void**,void*);
+  void      *this_closure;
+
+#endif
+
+  void     (*fun)(ffi_cif*,void*,ffi_java_raw*,void*);
+  void      *user_data;
+
+} ffi_java_raw_closure;
+
+FFI_API ffi_status
+ffi_prep_raw_closure (ffi_raw_closure*,
+		      ffi_cif *cif,
+		      void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+		      void *user_data);
+
+FFI_API ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure*,
+			  ffi_cif *cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc);
+
+#if !FFI_NATIVE_RAW_API
+FFI_API ffi_status
+ffi_prep_java_raw_closure (ffi_java_raw_closure*,
+		           ffi_cif *cif,
+		           void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+		           void *user_data);
+
+FFI_API ffi_status
+ffi_prep_java_raw_closure_loc (ffi_java_raw_closure*,
+			       ffi_cif *cif,
+			       void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*),
+			       void *user_data,
+			       void *codeloc);
+#endif
+
+#endif /* FFI_CLOSURES */
+
+#if FFI_GO_CLOSURES
+
+typedef struct {
+  void      *tramp;
+  ffi_cif   *cif;
+  void     (*fun)(ffi_cif*,void*,void**,void*);
+} ffi_go_closure;
+
+FFI_API ffi_status ffi_prep_go_closure (ffi_go_closure*, ffi_cif *,
+				void (*fun)(ffi_cif*,void*,void**,void*));
+
+FFI_API void ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+		  void **avalue, void *closure);
+
+#endif /* FFI_GO_CLOSURES */
+
+/* ---- Public interface definition -------------------------------------- */
+
+FFI_API 
+ffi_status ffi_prep_cif(ffi_cif *cif,
+			ffi_abi abi,
+			unsigned int nargs,
+			ffi_type *rtype,
+			ffi_type **atypes);
+
+FFI_API
+ffi_status ffi_prep_cif_var(ffi_cif *cif,
+			    ffi_abi abi,
+			    unsigned int nfixedargs,
+			    unsigned int ntotalargs,
+			    ffi_type *rtype,
+			    ffi_type **atypes);
+
+FFI_API
+void ffi_call(ffi_cif *cif,
+	      void (*fn)(void),
+	      void *rvalue,
+	      void **avalue);
+
+FFI_API
+ffi_status ffi_get_struct_offsets (ffi_abi abi, ffi_type *struct_type,
+				   size_t *offsets);
+
+/* Useful for eliminating compiler warnings.  */
+#define FFI_FN(f) ((void (*)(void))f)
+
+/* ---- Definitions shared with assembly code ---------------------------- */
+
+#endif
+
+/* If these change, update src/mips/ffitarget.h. */
+#define FFI_TYPE_VOID       0    
+#define FFI_TYPE_INT        1
+#define FFI_TYPE_FLOAT      2    
+#define FFI_TYPE_DOUBLE     3
+#if 0
+#define FFI_TYPE_LONGDOUBLE 4
+#else
+#define FFI_TYPE_LONGDOUBLE FFI_TYPE_DOUBLE
+#endif
+#define FFI_TYPE_UINT8      5   
+#define FFI_TYPE_SINT8      6
+#define FFI_TYPE_UINT16     7 
+#define FFI_TYPE_SINT16     8
+#define FFI_TYPE_UINT32     9
+#define FFI_TYPE_SINT32     10
+#define FFI_TYPE_UINT64     11
+#define FFI_TYPE_SINT64     12
+#define FFI_TYPE_STRUCT     13
+#define FFI_TYPE_POINTER    14
+#define FFI_TYPE_COMPLEX    15
+
+/* This should always refer to the last type code (for sanity checks).  */
+#define FFI_TYPE_LAST       FFI_TYPE_COMPLEX
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/libffi_arm64/include/fficonfig.h b/c/libffi_arm64/include/fficonfig.h
new file mode 100644
index 0000000..5768c29
--- /dev/null
+++ b/c/libffi_arm64/include/fficonfig.h
@@ -0,0 +1,215 @@
+/* fficonfig.h.  Generated from fficonfig.h.in by configure.  */
+/* fficonfig.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+   systems. This function is required for `alloca.c' support on those systems.
+   */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define to 1 if using `alloca.c'. */
+/* #undef C_ALLOCA */
+
+/* Define to the flags needed for the .section .eh_frame directive. */
+/* #undef EH_FRAME_FLAGS */
+
+/* Define this if you want extra debugging. */
+/* #undef FFI_DEBUG */
+
+/* Cannot use PROT_EXEC on this target, so, we revert to alternative means */
+/* #undef FFI_EXEC_TRAMPOLINE_TABLE */
+
+/* Define this if you want to enable pax emulated trampolines */
+/* #undef FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
+/* Cannot use malloc on this target, so, we revert to alternative means */
+/* #undef FFI_MMAP_EXEC_WRIT */
+
+/* Define this if you do not want support for the raw API. */
+/* #undef FFI_NO_RAW_API */
+
+/* Define this if you do not want support for aggregate types. */
+/* #undef FFI_NO_STRUCTS */
+
+/* Define to 1 if you have `alloca', as a function or macro. */
+#define HAVE_ALLOCA 1
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if your assembler supports .cfi_* directives. */
+/* #undef HAVE_AS_CFI_PSEUDO_OP */
+
+/* Define if your assembler supports .register. */
+/* #undef HAVE_AS_REGISTER_PSEUDO_OP */
+
+/* Define if the compiler uses zarch features. */
+/* #undef HAVE_AS_S390_ZARCH */
+
+/* Define if your assembler and linker support unaligned PC relative relocs.
+   */
+/* #undef HAVE_AS_SPARC_UA_PCREL */
+
+/* Define if your assembler supports unwind section type. */
+/* #undef HAVE_AS_X86_64_UNWIND_SECTION_TYPE */
+
+/* Define if your assembler supports PC relative relocs. */
+/* #undef HAVE_AS_X86_PCREL */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+/* #undef HAVE_DLFCN_H */
+
+/* Define if __attribute__((visibility("hidden"))) is supported. */
+/* #undef HAVE_HIDDEN_VISIBILITY_ATTRIBUTE */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define if you have the long double type and it is bigger than a double */
+/* #undef HAVE_LONG_DOUBLE */
+
+/* Define if you support more than one size of the long double type */
+/* #undef HAVE_LONG_DOUBLE_VARIANT */
+
+/* Define to 1 if you have the `memcpy' function. */
+/* #undef HAVE_MEMCPY */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `mkostemp' function. */
+/* #undef HAVE_MKOSTEMP */
+
+/* Define to 1 if you have the `mmap' function. */
+/* #undef HAVE_MMAP */
+
+/* Define if mmap with MAP_ANON(YMOUS) works. */
+/* #undef HAVE_MMAP_ANON */
+
+/* Define if mmap of /dev/zero works. */
+/* #undef HAVE_MMAP_DEV_ZERO */
+
+/* Define if read-only mmap of a plain file works. */
+/* #undef HAVE_MMAP_FILE */
+
+/* Define if .eh_frame sections should be read-only. */
+/* #undef HAVE_RO_EH_FRAME */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+/* #undef HAVE_STRINGS_H */
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+/* #undef HAVE_SYS_MMAN_H */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+/* #undef HAVE_UNISTD_H */
+
+/* Define to 1 if GNU symbol versioning is used for libatomic. */
+/* #undef LIBFFI_GNU_SYMBOL_VERSIONING */
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "libffi"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "http://github.com/libffi/libffi/issues"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libffi"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libffi 3.3-rc0"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libffi"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "3.3-rc0"
+
+/* The size of `double', as computed by sizeof. */
+#define SIZEOF_DOUBLE 8
+
+/* The size of `long double', as computed by sizeof. */
+#define SIZEOF_LONG_DOUBLE 8
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T 8
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at runtime.
+	STACK_DIRECTION > 0 => grows toward higher addresses
+	STACK_DIRECTION < 0 => grows toward lower addresses
+	STACK_DIRECTION = 0 => direction of growth unknown */
+/* #undef STACK_DIRECTION */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define if symbols are underscored. */
+/* #undef SYMBOL_UNDERSCORE */
+
+/* Define this if you are using Purify and want to suppress spurious messages.
+   */
+/* #undef USING_PURIFY */
+
+/* Version number of package */
+#define VERSION "3.3-rc0"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
+
+
+#ifdef HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+#ifdef LIBFFI_ASM
+#ifdef __APPLE__
+#define FFI_HIDDEN(name) .private_extern name
+#else
+#define FFI_HIDDEN(name) .hidden name
+#endif
+#else
+#define FFI_HIDDEN __attribute__ ((visibility ("hidden")))
+#endif
+#else
+#ifdef LIBFFI_ASM
+#define FFI_HIDDEN(name)
+#else
+#define FFI_HIDDEN
+#endif
+#endif
+
diff --git a/c/libffi_arm64/include/ffitarget.h b/c/libffi_arm64/include/ffitarget.h
new file mode 100644
index 0000000..ecb6d2d
--- /dev/null
+++ b/c/libffi_arm64/include/ffitarget.h
@@ -0,0 +1,92 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+#ifdef __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#elif defined(_M_ARM64)
+#define FFI_SIZEOF_ARG 8
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#else
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+#endif
+
+typedef enum ffi_abi
+  {
+    FFI_FIRST_ABI = 0,
+    FFI_SYSV,
+    FFI_LAST_ABI,
+    FFI_DEFAULT_ABI = FFI_SYSV
+  } ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
+#else
+#error "No trampoline table implementation"
+#endif
+
+#else
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
+
+#ifdef _M_ARM64
+#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic
+#endif
+
+/* ---- Internal ---- */
+
+#if defined (__APPLE__)
+#define FFI_TARGET_SPECIFIC_VARIADIC
+#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs
+#elif !defined(_M_ARM64)
+/* iOS and Windows reserve x18 for the system.  Disable Go closures until
+   a new static chain is chosen.  */
+#define FFI_GO_CLOSURES 1
+#endif
+
+#ifndef _M_ARM64
+/* No complex type on Windows */
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+#endif
diff --git a/c/libffi_msvc/LICENSE b/c/libffi_x86_x64/LICENSE
similarity index 100%
rename from c/libffi_msvc/LICENSE
rename to c/libffi_x86_x64/LICENSE
diff --git a/c/libffi_msvc/README b/c/libffi_x86_x64/README
similarity index 100%
rename from c/libffi_msvc/README
rename to c/libffi_x86_x64/README
diff --git a/c/libffi_msvc/README.ctypes b/c/libffi_x86_x64/README.ctypes
similarity index 100%
rename from c/libffi_msvc/README.ctypes
rename to c/libffi_x86_x64/README.ctypes
diff --git a/c/libffi_msvc/ffi.c b/c/libffi_x86_x64/ffi.c
similarity index 93%
rename from c/libffi_msvc/ffi.c
rename to c/libffi_x86_x64/ffi.c
index 836f171..b9e324f 100644
--- a/c/libffi_msvc/ffi.c
+++ b/c/libffi_x86_x64/ffi.c
@@ -103,7 +103,7 @@
 	    }
 	}
 #ifdef _WIN64
-      else if (z > 8)
+      else if (z != 1 && z != 2 && z != 4 && z != 8)
         {
           /* On Win64, if a single argument takes more than 8 bytes,
              then it is always passed by reference. */
@@ -144,9 +144,11 @@
       /* MSVC returns small structures in registers.  Put in cif->flags
          the value FFI_TYPE_STRUCT only if the structure is big enough;
          otherwise, put the 4- or 8-bytes integer type. */
-      if (cif->rtype->size <= 4)
+      if (cif->rtype->size == 1 ||
+          cif->rtype->size == 2 ||
+          cif->rtype->size == 4)
         cif->flags = FFI_TYPE_INT;
-      else if (cif->rtype->size <= 8)
+      else if (cif->rtype->size == 8)
         cif->flags = FFI_TYPE_SINT64;
       else
         cif->flags = FFI_TYPE_STRUCT;
@@ -287,16 +289,12 @@
 	    _asm fld DWORD PTR [eax] ;
 //      asm ("flds (%0)" : : "r" (resp) : "st" );
     }
-  else if (rtype == FFI_TYPE_DOUBLE)
+  else if (rtype == FFI_TYPE_DOUBLE || rtype == FFI_TYPE_LONGDOUBLE)
     {
 	    _asm mov eax, resp ;
 	    _asm fld QWORD PTR [eax] ;
 //      asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
     }
-  else if (rtype == FFI_TYPE_LONGDOUBLE)
-    {
-//      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
-    }
   else if (rtype == FFI_TYPE_SINT64)
     {
 	    _asm mov edx, resp ;
@@ -307,6 +305,10 @@
 //	   : : "r"(resp)
 //	   : "eax", "edx");
     }
+  else if (rtype == FFI_TYPE_STRUCT)
+    {
+	    _asm mov eax, resp ;
+    }
 #else
   /* now, do a generic return based on the value of rtype */
   if (rtype == FFI_TYPE_INT)
@@ -317,14 +319,10 @@
     {
       asm ("flds (%0)" : : "r" (resp) : "st" );
     }
-  else if (rtype == FFI_TYPE_DOUBLE)
+  else if (rtype == FFI_TYPE_DOUBLE || rtype == FFI_TYPE_LONGDOUBLE)
     {
       asm ("fldl (%0)" : : "r" (resp) : "st", "st(1)" );
     }
-  else if (rtype == FFI_TYPE_LONGDOUBLE)
-    {
-      asm ("fldt (%0)" : : "r" (resp) : "st", "st(1)" );
-    }
   else if (rtype == FFI_TYPE_SINT64)
     {
       asm ("movl 0(%0),%%eax;"
@@ -332,6 +330,10 @@
 	   : : "r"(resp)
 	   : "eax", "edx");
     }
+  else if (rtype == FFI_TYPE_STRUCT)
+    {
+      asm ("movl %0,%%eax" : : "r" (resp) : "eax");
+    }
 #endif
 #endif
 
@@ -340,6 +342,8 @@
      result types except for floats; we have to 'mov xmm0, rax' in the
      caller to correct this.
   */
+  if (rtype == FFI_TYPE_STRUCT)
+      return resp;
   return *(void **)resp;
 #endif
 }
@@ -378,7 +382,7 @@
       /* because we're little endian, this is what it turns into.   */
 
 #ifdef _WIN64
-      if (z > 8)
+      if (z != 1 && z != 2 && z != 4 && z != 8)
         {
           /* On Win64, if a single argument takes more than 8 bytes,
              then it is always passed by reference. */
@@ -447,6 +451,11 @@
        || cif->arg_types[3]->type == FFI_TYPE_DOUBLE))
     mask |= 8;
 
+  /* if we return a non-small struct, then the first argument is a pointer
+   * to the return area, and all real arguments are shifted by one */
+  if (cif->flags == FFI_TYPE_STRUCT)
+    mask = (mask & ~8) << 1;
+
   /* 41 BB ----         mov         r11d,mask */
   BYTES("\x41\xBB"); INT(mask);
 
diff --git a/c/libffi_msvc/ffi.h b/c/libffi_x86_x64/ffi.h
similarity index 100%
rename from c/libffi_msvc/ffi.h
rename to c/libffi_x86_x64/ffi.h
diff --git a/c/libffi_msvc/ffi_common.h b/c/libffi_x86_x64/ffi_common.h
similarity index 100%
rename from c/libffi_msvc/ffi_common.h
rename to c/libffi_x86_x64/ffi_common.h
diff --git a/c/libffi_msvc/fficonfig.h b/c/libffi_x86_x64/fficonfig.h
similarity index 100%
rename from c/libffi_msvc/fficonfig.h
rename to c/libffi_x86_x64/fficonfig.h
diff --git a/c/libffi_msvc/ffitarget.h b/c/libffi_x86_x64/ffitarget.h
similarity index 100%
rename from c/libffi_msvc/ffitarget.h
rename to c/libffi_x86_x64/ffitarget.h
diff --git a/c/libffi_msvc/prep_cif.c b/c/libffi_x86_x64/prep_cif.c
similarity index 96%
rename from c/libffi_msvc/prep_cif.c
rename to c/libffi_x86_x64/prep_cif.c
index 5dacfff..df94a98 100644
--- a/c/libffi_msvc/prep_cif.c
+++ b/c/libffi_x86_x64/prep_cif.c
@@ -117,7 +117,10 @@
   /* Make space for the return structure pointer */
   if (cif->rtype->type == FFI_TYPE_STRUCT
 #ifdef _WIN32
-      && (cif->rtype->size > 8)  /* MSVC returns small structs in registers */
+      && (cif->rtype->size != 1)  /* MSVC returns small structs in registers */
+      && (cif->rtype->size != 2)
+      && (cif->rtype->size != 4)
+      && (cif->rtype->size != 8)
 #endif
 #ifdef SPARC
       && (cif->abi != FFI_V9 || cif->rtype->size > 32)
diff --git a/c/libffi_msvc/types.c b/c/libffi_x86_x64/types.c
similarity index 100%
rename from c/libffi_msvc/types.c
rename to c/libffi_x86_x64/types.c
diff --git a/c/libffi_msvc/win32.c b/c/libffi_x86_x64/win32.c
similarity index 100%
rename from c/libffi_msvc/win32.c
rename to c/libffi_x86_x64/win32.c
diff --git a/c/libffi_msvc/win64.asm b/c/libffi_x86_x64/win64.asm
similarity index 100%
rename from c/libffi_msvc/win64.asm
rename to c/libffi_x86_x64/win64.asm
diff --git a/c/libffi_msvc/win64.obj b/c/libffi_x86_x64/win64.obj
similarity index 100%
rename from c/libffi_msvc/win64.obj
rename to c/libffi_x86_x64/win64.obj
Binary files differ
diff --git a/c/misc_win32.h b/c/misc_win32.h
index 07b76c1..156cf5d 100644
--- a/c/misc_win32.h
+++ b/c/misc_win32.h
@@ -124,8 +124,10 @@
             s_buf[--len] = L'\0';
         message = PyUnicode_FromWideChar(s_buf, len);
     }
-    if (message != NULL)
+    if (message != NULL) {
         v = Py_BuildValue("(iO)", err, message);
+        Py_DECREF(message);
+    }
     else
         v = NULL;
     LocalFree(s_buf);
@@ -168,7 +170,6 @@
         /* Only seen this in out of mem situations */
         sprintf(s_small_buf, "Windows Error 0x%X", err);
         s = s_small_buf;
-        s_buf = NULL;
     } else {
         s = s_buf;
         /* remove trailing cr/lf and dots */
diff --git a/c/realize_c_type.c b/c/realize_c_type.c
index 082c488..82629b7 100644
--- a/c/realize_c_type.c
+++ b/c/realize_c_type.c
@@ -413,19 +413,12 @@
 }
 
 static PyObject *
-realize_c_type_or_func(builder_c_t *builder,
-                        _cffi_opcode_t opcodes[], int index)
+realize_c_type_or_func_now(builder_c_t *builder, _cffi_opcode_t op,
+                           _cffi_opcode_t opcodes[], int index)
 {
     PyObject *x, *y, *z;
-    _cffi_opcode_t op = opcodes[index];
     Py_ssize_t length = -1;
 
-    if ((((uintptr_t)op) & 1) == 0) {
-        x = (PyObject *)op;
-        Py_INCREF(x);
-        return x;
-    }
-
     switch (_CFFI_GETOP(op)) {
 
     case _CFFI_OP_PRIMITIVE:
@@ -643,6 +636,36 @@
         return NULL;
     }
 
+    return x;
+}
+
+static int _realize_recursion_level;
+
+static PyObject *
+realize_c_type_or_func(builder_c_t *builder,
+                        _cffi_opcode_t opcodes[], int index)
+{
+    PyObject *x;
+     _cffi_opcode_t op = opcodes[index];
+
+    if ((((uintptr_t)op) & 1) == 0) {
+        x = (PyObject *)op;
+        Py_INCREF(x);
+        return x;
+    }
+
+    if (_realize_recursion_level >= 1000) {
+        PyErr_Format(PyExc_RuntimeError,
+            "type-building recursion too deep or infinite.  "
+            "This is known to occur e.g. in ``struct s { void(*callable)"
+            "(struct s); }''.  Please report if you get this error and "
+            "really need support for your case.");
+        return NULL;
+    }
+    _realize_recursion_level++;
+    x = realize_c_type_or_func_now(builder, op, opcodes, index);
+    _realize_recursion_level--;
+
     if (x != NULL && opcodes == builder->ctx.types && opcodes[index] != x) {
         assert((((uintptr_t)x) & 1) == 0);
         assert((((uintptr_t)opcodes[index]) & 1) == 1);
@@ -650,7 +673,7 @@
         opcodes[index] = x;
     }
     return x;
-};
+}
 
 static CTypeDescrObject *
 realize_c_func_return_type(builder_c_t *builder,
diff --git a/c/test_c.py b/c/test_c.py
index da5f751..654584d 100644
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -1,18 +1,23 @@
 import py
+import pytest
+
 def _setup_path():
     import os, sys
-    if '__pypy__' in sys.builtin_module_names:
-        py.test.skip("_cffi_backend.c: not tested on top of pypy, "
-                     "use pypy/module/_cffi_backend/test/ instead.")
     sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 _setup_path()
 from _cffi_backend import *
-from _cffi_backend import _testfunc, _get_types, _get_common_types, __version__
+from _cffi_backend import _get_types, _get_common_types
+try:
+    from _cffi_backend import _testfunc
+except ImportError:
+    def _testfunc(num):
+        pytest.skip("_testunc() not available")
+from _cffi_backend import __version__
 
 # ____________________________________________________________
 
 import sys
-assert __version__ == "1.12.2", ("This test_c.py file is for testing a version"
+assert __version__ == "1.15.0", ("This test_c.py file is for testing a version"
                                  " of cffi that differs from the one that we"
                                  " get from 'import _cffi_backend'")
 if sys.version_info < (3,):
@@ -63,8 +68,10 @@
         path = ctypes.util.find_library(name)
         if path is None and name == 'c':
             assert sys.platform == 'win32'
-            assert sys.version_info >= (3,)
-            py.test.skip("dlopen(None) cannot work on Windows with Python 3")
+            assert (sys.version_info >= (3,) or
+                    '__pypy__' in sys.builtin_module_names)
+            py.test.skip("dlopen(None) cannot work on Windows "
+                         "with PyPy or Python 3")
     return load_library(path, flags)
 
 def test_load_library():
@@ -107,7 +114,7 @@
     p = new_primitive_type("signed char")
     x = cast(p, -65 + 17*256)
     assert repr(x) == "<cdata 'signed char' -65>"
-    assert repr(type(x)) == "<%s '_cffi_backend.CData'>" % type_or_class
+    assert repr(type(x)) == "<%s '_cffi_backend._CDataBase'>" % type_or_class
     assert int(x) == -65
     x = cast(p, -66 + (1<<199)*256)
     assert repr(x) == "<cdata 'signed char' -66>"
@@ -315,8 +322,10 @@
     assert p[0] == 0
     p = newp(BPtr, 5000)
     assert p[0] == 5000
-    py.test.raises(IndexError, "p[1]")
-    py.test.raises(IndexError, "p[-1]")
+    with pytest.raises(IndexError):
+        p[1]
+    with pytest.raises(IndexError):
+        p[-1]
 
 def test_reading_pointer_to_float():
     BFloat = new_primitive_type("float")
@@ -444,7 +453,8 @@
 def test_invalid_indexing():
     p = new_primitive_type("int")
     x = cast(p, 42)
-    py.test.raises(TypeError, "x[0]")
+    with pytest.raises(TypeError):
+        x[0]
 
 def test_default_str():
     BChar = new_primitive_type("char")
@@ -537,13 +547,16 @@
     assert len(a) == LENGTH
     for i in range(LENGTH):
         assert a[i] == 0
-    py.test.raises(IndexError, "a[LENGTH]")
-    py.test.raises(IndexError, "a[-1]")
+    with pytest.raises(IndexError):
+        a[LENGTH]
+    with pytest.raises(IndexError):
+        a[-1]
     for i in range(LENGTH):
         a[i] = i * i + 1
     for i in range(LENGTH):
         assert a[i] == i * i + 1
-    e = py.test.raises(IndexError, "a[LENGTH+100] = 500")
+    with pytest.raises(IndexError) as e:
+        a[LENGTH+100] = 500
     assert ('(expected %d < %d)' % (LENGTH+100, LENGTH)) in str(e.value)
     py.test.raises(TypeError, int, a)
 
@@ -558,10 +571,14 @@
         a[i] -= i
     for i in range(42):
         assert a[i] == -i
-    py.test.raises(IndexError, "a[42]")
-    py.test.raises(IndexError, "a[-1]")
-    py.test.raises(IndexError, "a[42] = 123")
-    py.test.raises(IndexError, "a[-1] = 456")
+    with pytest.raises(IndexError):
+        a[42]
+    with pytest.raises(IndexError):
+        a[-1]
+    with pytest.raises(IndexError):
+        a[42] = 123
+    with pytest.raises(IndexError):
+        a[-1] = 456
 
 def test_array_of_unknown_length_instance_with_initializer():
     p = new_primitive_type("int")
@@ -609,10 +626,14 @@
     assert a == (p - 1)
     BPtr = new_pointer_type(new_primitive_type("short"))
     q = newp(BPtr, None)
-    py.test.raises(TypeError, "p - q")
-    py.test.raises(TypeError, "q - p")
-    py.test.raises(TypeError, "a - q")
-    e = py.test.raises(TypeError, "q - a")
+    with pytest.raises(TypeError):
+        p - q
+    with pytest.raises(TypeError):
+        q - p
+    with pytest.raises(TypeError):
+        a - q
+    with pytest.raises(TypeError) as e:
+        q - a
     assert str(e.value) == "cannot subtract cdata 'short *' and cdata 'int *'"
 
 def test_ptr_sub_unaligned():
@@ -625,8 +646,10 @@
             assert b - a == (bi - 1240) // size_of_int()
             assert a - b == (1240 - bi) // size_of_int()
         else:
-            py.test.raises(ValueError, "b - a")
-            py.test.raises(ValueError, "a - b")
+            with pytest.raises(ValueError):
+                b - a
+            with pytest.raises(ValueError):
+                a - b
 
 def test_cast_primitive_from_cdata():
     p = new_primitive_type("int")
@@ -777,10 +800,12 @@
     BStruct = new_struct_type("struct foo")
     BStructPtr = new_pointer_type(BStruct)
     p = cast(BStructPtr, 42)
-    e = py.test.raises(AttributeError, "p.a1")    # opaque
+    with pytest.raises(AttributeError) as e:
+        p.a1    # opaque
     assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: "
                             "cannot read fields")
-    e = py.test.raises(AttributeError, "p.a1 = 10")    # opaque
+    with pytest.raises(AttributeError) as e:
+        p.a1 = 10    # opaque
     assert str(e.value) == ("cdata 'struct foo *' points to an opaque type: "
                             "cannot write fields")
 
@@ -792,30 +817,41 @@
     s.a2 = 123
     assert s.a1 == 0
     assert s.a2 == 123
-    py.test.raises(OverflowError, "s.a1 = sys.maxsize+1")
+    with pytest.raises(OverflowError):
+        s.a1 = sys.maxsize+1
     assert s.a1 == 0
-    e = py.test.raises(AttributeError, "p.foobar")
+    with pytest.raises(AttributeError) as e:
+        p.foobar
     assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'"
-    e = py.test.raises(AttributeError, "p.foobar = 42")
+    with pytest.raises(AttributeError) as e:
+        p.foobar = 42
     assert str(e.value) == "cdata 'struct foo *' has no field 'foobar'"
-    e = py.test.raises(AttributeError, "s.foobar")
+    with pytest.raises(AttributeError) as e:
+        s.foobar
     assert str(e.value) == "cdata 'struct foo' has no field 'foobar'"
-    e = py.test.raises(AttributeError, "s.foobar = 42")
+    with pytest.raises(AttributeError) as e:
+        s.foobar = 42
     assert str(e.value) == "cdata 'struct foo' has no field 'foobar'"
     j = cast(BInt, 42)
-    e = py.test.raises(AttributeError, "j.foobar")
+    with pytest.raises(AttributeError) as e:
+        j.foobar
     assert str(e.value) == "cdata 'int' has no attribute 'foobar'"
-    e = py.test.raises(AttributeError, "j.foobar = 42")
+    with pytest.raises(AttributeError) as e:
+        j.foobar = 42
     assert str(e.value) == "cdata 'int' has no attribute 'foobar'"
     j = cast(new_pointer_type(BInt), 42)
-    e = py.test.raises(AttributeError, "j.foobar")
+    with pytest.raises(AttributeError) as e:
+        j.foobar
     assert str(e.value) == "cdata 'int *' has no attribute 'foobar'"
-    e = py.test.raises(AttributeError, "j.foobar = 42")
+    with pytest.raises(AttributeError) as e:
+        j.foobar = 42
     assert str(e.value) == "cdata 'int *' has no attribute 'foobar'"
     pp = newp(new_pointer_type(BStructPtr), p)
-    e = py.test.raises(AttributeError, "pp.a1")
+    with pytest.raises(AttributeError) as e:
+        pp.a1
     assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'"
-    e = py.test.raises(AttributeError, "pp.a1 = 42")
+    with pytest.raises(AttributeError) as e:
+        pp.a1 = 42
     assert str(e.value) == "cdata 'struct foo * *' has no attribute 'a1'"
 
 def test_union_instance():
@@ -1295,7 +1331,9 @@
     except ImportError:
         import io as cStringIO    # Python 3
     import linecache
-    def matches(istr, ipattern):
+    def matches(istr, ipattern, ipattern38):
+        if sys.version_info >= (3, 8):
+            ipattern = ipattern38
         str, pattern = istr, ipattern
         while '$' in pattern:
             i = pattern.index('$')
@@ -1328,6 +1366,8 @@
     try:
         linecache.getline = lambda *args: 'LINE'    # hack: speed up PyPy tests
         sys.stderr = cStringIO.StringIO()
+        if hasattr(sys, '__unraisablehook__'):          # work around pytest
+            sys.unraisablehook = sys.__unraisablehook__ # on recent CPythons
         assert f(100) == 300
         assert sys.stderr.getvalue() == ''
         assert f(10000) == -42
@@ -1339,6 +1379,14 @@
   File "$", line $, in check_value
     $
 ValueError: 42
+""", """\
+Exception ignored from cffi callback <function$Zcb1 at 0x$>:
+Traceback (most recent call last):
+  File "$", line $, in Zcb1
+    $
+  File "$", line $, in check_value
+    $
+ValueError: 42
 """)
         sys.stderr = cStringIO.StringIO()
         bigvalue = 20000
@@ -1347,6 +1395,12 @@
 From cffi callback <function$Zcb1 at 0x$>:
 Trying to convert the result back to C:
 OverflowError: integer 60000 does not fit 'short'
+""", """\
+Exception ignored from cffi callback <function$Zcb1 at 0x$>, trying to convert the result back to C:
+Traceback (most recent call last):
+  File "$", line $, in test_callback_exception
+    $
+OverflowError: integer 60000 does not fit 'short'
 """)
         sys.stderr = cStringIO.StringIO()
         bigvalue = 20000
@@ -1384,11 +1438,24 @@
 During the call to 'onerror', another exception occurred:
 
 TypeError: $integer$
+""", """\
+Exception ignored from cffi callback <function$Zcb1 at 0x$>, trying to convert the result back to C:
+Traceback (most recent call last):
+  File "$", line $, in test_callback_exception
+    $
+OverflowError: integer 60000 does not fit 'short'
+Exception ignored during handling of the above exception by 'onerror':
+Traceback (most recent call last):
+  File "$", line $, in test_callback_exception
+    $
+TypeError: $integer$
 """)
         #
         sys.stderr = cStringIO.StringIO()
         seen = "not a list"    # this makes the oops() function crash
         assert ff(bigvalue) == -42
+        # the $ after the AttributeError message are for the suggestions that
+        # will be added in Python 3.10
         assert matches(sys.stderr.getvalue(), """\
 From cffi callback <function$Zcb1 at 0x$>:
 Trying to convert the result back to C:
@@ -1399,7 +1466,18 @@
 Traceback (most recent call last):
   File "$", line $, in oops
     $
-AttributeError: 'str' object has no attribute 'append'
+AttributeError: 'str' object has no attribute 'append$
+""", """\
+Exception ignored from cffi callback <function$Zcb1 at 0x$>, trying to convert the result back to C:
+Traceback (most recent call last):
+  File "$", line $, in test_callback_exception
+    $
+OverflowError: integer 60000 does not fit 'short'
+Exception ignored during handling of the above exception by 'onerror':
+Traceback (most recent call last):
+  File "$", line $, in oops
+    $
+AttributeError: 'str' object has no attribute 'append$
 """)
     finally:
         sys.stderr = orig_stderr
@@ -1434,7 +1512,7 @@
     def make_callback(m):
         def cb(n):
             return n + m
-        return callback(BFunc, cb, 42)    # 'cb' and 'BFunc' go out of scope
+        return callback(BFunc, cb, 42)    # 'cb' goes out of scope
     #
     flist = [make_callback(i) for i in range(BIGNUM)]
     for i, f in enumerate(flist):
@@ -1636,7 +1714,8 @@
     assert ("an integer is required" in msg or  # CPython
             "unsupported operand type for int(): 'NoneType'" in msg or  # old PyPys
             "expected integer, got NoneType object" in msg) # newer PyPys
-    py.test.raises(TypeError, 'p.a1 = "def"')
+    with pytest.raises(TypeError):
+        p.a1 = "def"
     if sys.version_info < (3,):
         BEnum2 = new_enum_type(unicode("foo"), (unicode('abc'),), (5,), BInt)
         assert string(cast(BEnum2, 5)) == 'abc'
@@ -1766,14 +1845,17 @@
     p.a1 = -1
     assert p.a1 == -1
     p.a1 = 0
-    py.test.raises(OverflowError, "p.a1 = 2")
+    with pytest.raises(OverflowError):
+        p.a1 = 2
     assert p.a1 == 0
     #
     p.a1 = -1
     p.a2 = 3
     p.a3 = -4
-    py.test.raises(OverflowError, "p.a3 = 4")
-    e = py.test.raises(OverflowError, "p.a3 = -5")
+    with pytest.raises(OverflowError):
+        p.a3 = 4
+    with pytest.raises(OverflowError) as e:
+        p.a3 = -5
     assert str(e.value) == ("value -5 outside the range allowed by the "
                             "bit field width: -4 <= x <= 3")
     assert p.a1 == -1 and p.a2 == 3 and p.a3 == -4
@@ -1782,7 +1864,8 @@
     # allows also setting the value "1" (it still gets read back as -1)
     p.a1 = 1
     assert p.a1 == -1
-    e = py.test.raises(OverflowError, "p.a1 = -2")
+    with pytest.raises(OverflowError) as e:
+        p.a1 = -2
     assert str(e.value) == ("value -2 outside the range allowed by the "
                             "bit field width: -1 <= x <= 1")
 
@@ -1842,14 +1925,17 @@
     assert string(a[2]) == b"."
     a[2] = b"12345"
     assert string(a[2]) == b"12345"
-    e = py.test.raises(IndexError, 'a[2] = b"123456"')
+    with pytest.raises(IndexError) as e:
+        a[2] = b"123456"
     assert 'char[5]' in str(e.value)
     assert 'got 6 characters' in str(e.value)
 
 def test_add_error():
     x = cast(new_primitive_type("int"), 42)
-    py.test.raises(TypeError, "x + 1")
-    py.test.raises(TypeError, "x - 1")
+    with pytest.raises(TypeError):
+        x + 1
+    with pytest.raises(TypeError):
+        x - 1
 
 def test_void_errors():
     py.test.raises(ValueError, alignof, new_void_type())
@@ -2181,8 +2267,10 @@
     s = newp(BStructPtr)
     s.a1 = u+'\x00'
     assert s.a1 == u+'\x00'
-    py.test.raises(TypeError, "s.a1 = b'a'")
-    py.test.raises(TypeError, "s.a1 = bytechr(0xFF)")
+    with pytest.raises(TypeError):
+        s.a1 = b'a'
+    with pytest.raises(TypeError):
+        s.a1 = bytechr(0xFF)
     s.a1 = u+'\u1234'
     assert s.a1 == u+'\u1234'
     if pyuni4:
@@ -2196,7 +2284,8 @@
             s.a1 = u+'\ud807\udf44'
             assert s.a1 == u+'\U00011f44'
     else:
-        py.test.raises(TypeError, "s.a1 = u+'\U00012345'")
+        with pytest.raises(TypeError):
+            s.a1 = u+'\U00012345'
     #
     BWCharArray = new_array_type(BWCharP, None)
     a = newp(BWCharArray, u+'hello \u1234 world')
@@ -2220,7 +2309,8 @@
         assert list(a) == expected
         got = [a[i] for i in range(4)]
         assert got == expected
-        py.test.raises(IndexError, 'a[4]')
+        with pytest.raises(IndexError):
+            a[4]
     #
     w = cast(BWChar, 'a')
     assert repr(w) == "<cdata '%s' %s'a'>" % (typename, mandatory_u_prefix)
@@ -2352,9 +2442,11 @@
 def test_cannot_dereference_void():
     BVoidP = new_pointer_type(new_void_type())
     p = cast(BVoidP, 123456)
-    py.test.raises(TypeError, "p[0]")
+    with pytest.raises(TypeError):
+        p[0]
     p = cast(BVoidP, 0)
-    py.test.raises((TypeError, RuntimeError), "p[0]")
+    with pytest.raises((TypeError, RuntimeError)):
+        p[0]
 
 def test_iter():
     BInt = new_primitive_type("int")
@@ -2377,12 +2469,12 @@
     assert (q == p) is False
     assert (q != p) is True
     if strict_compare:
-        py.test.raises(TypeError, "p < q")
-        py.test.raises(TypeError, "p <= q")
-        py.test.raises(TypeError, "q < p")
-        py.test.raises(TypeError, "q <= p")
-        py.test.raises(TypeError, "p > q")
-        py.test.raises(TypeError, "p >= q")
+        with pytest.raises(TypeError): p < q
+        with pytest.raises(TypeError): p <= q
+        with pytest.raises(TypeError): q < p
+        with pytest.raises(TypeError): q <= p
+        with pytest.raises(TypeError): p > q
+        with pytest.raises(TypeError): p >= q
     r = cast(BVoidP, p)
     assert (p <  r) is False
     assert (p <= r) is True
@@ -2428,7 +2520,8 @@
         try:
             expected = b"hi there\x00"[i]
         except IndexError:
-            py.test.raises(IndexError, "buf[i]")
+            with pytest.raises(IndexError):
+                buf[i]
         else:
             assert buf[i] == bitem2bchr(expected)
     # --mb_slice--
@@ -2455,15 +2548,18 @@
         try:
             expected[i] = bytechr(i & 0xff)
         except IndexError:
-            py.test.raises(IndexError, "buf[i] = bytechr(i & 0xff)")
+            with pytest.raises(IndexError):
+                buf[i] = bytechr(i & 0xff)
         else:
             buf[i] = bytechr(i & 0xff)
         assert list(buf) == expected
     # --mb_ass_slice--
     buf[:] = b"hi there\x00"
     assert list(buf) == list(c) == list(map(bitem2bchr, b"hi there\x00"))
-    py.test.raises(ValueError, 'buf[:] = b"shorter"')
-    py.test.raises(ValueError, 'buf[:] = b"this is much too long!"')
+    with pytest.raises(ValueError):
+        buf[:] = b"shorter"
+    with pytest.raises(ValueError):
+        buf[:] = b"this is much too long!"
     buf[4:2] = b""   # no effect, but should work
     assert buf[:] == b"hi there\x00"
     buf[:2] = b"HI"
@@ -2499,8 +2595,8 @@
     assert get_errno() == 95
 
 def test_errno_callback():
-    if globals().get('PY_DOT_PY') == '2.5':
-        py.test.skip("cannot run this test on py.py with Python 2.5")
+    if globals().get('PY_DOT_PY'):
+        py.test.skip("cannot run this test on py.py (e.g. fails on Windows)")
     set_errno(95)
     def cb():
         e = get_errno()
@@ -2537,14 +2633,16 @@
     BChar = new_primitive_type("char")
     BCharP = new_pointer_type(BChar)
     x = newp(BCharP)
-    py.test.raises(TypeError, "del x[0]")
+    with pytest.raises(TypeError):
+        del x[0]
 
 def test_bug_delattr():
     BLong = new_primitive_type("long")
     BStruct = new_struct_type("struct foo")
     complete_struct_or_union(BStruct, [('a1', BLong, -1)])
     x = newp(new_pointer_type(BStruct))
-    py.test.raises(AttributeError, "del x.a1")
+    with pytest.raises(AttributeError):
+        del x.a1
 
 def test_variable_length_struct():
     py.test.skip("later")
@@ -2562,7 +2660,8 @@
     assert sizeof(x) == 6 * size_of_long()
     x[4] = 123
     assert x[4] == 123
-    py.test.raises(IndexError, "x[5]")
+    with pytest.raises(IndexError):
+        x[5]
     assert len(x.a2) == 5
     #
     py.test.raises(TypeError, newp, BStructP, [123])
@@ -2814,7 +2913,8 @@
     BCharP = new_pointer_type(new_primitive_type("char"))
     p = newp(BCharP, b'X')
     q = cast(BBoolP, p)
-    py.test.raises(ValueError, "q[0]")
+    with pytest.raises(ValueError):
+        q[0]
     py.test.raises(TypeError, newp, BBoolP, b'\x00')
     assert newp(BBoolP, 0)[0] is False
     assert newp(BBoolP, 1)[0] is True
@@ -3114,8 +3214,10 @@
     assert c[1] == 123
     assert c[3] == 456
     assert d[2] == 456
-    py.test.raises(IndexError, "d[3]")
-    py.test.raises(IndexError, "d[-1]")
+    with pytest.raises(IndexError):
+        d[3]
+    with pytest.raises(IndexError):
+        d[-1]
 
 def test_slice_ptr():
     BIntP = new_pointer_type(new_primitive_type("int"))
@@ -3133,7 +3235,8 @@
     c = newp(BIntArray, 5)
     c[0:5]
     assert len(c[5:5]) == 0
-    py.test.raises(IndexError, "c[-1:1]")
+    with pytest.raises(IndexError):
+        c[-1:1]
     cp = c + 0
     cp[-1:1]
 
@@ -3141,17 +3244,23 @@
     BIntP = new_pointer_type(new_primitive_type("int"))
     BIntArray = new_array_type(BIntP, None)
     c = newp(BIntArray, 5)
-    e = py.test.raises(IndexError, "c[:5]")
+    with pytest.raises(IndexError) as e:
+        c[:5]
     assert str(e.value) == "slice start must be specified"
-    e = py.test.raises(IndexError, "c[4:]")
+    with pytest.raises(IndexError) as e:
+        c[4:]
     assert str(e.value) == "slice stop must be specified"
-    e = py.test.raises(IndexError, "c[1:2:3]")
+    with pytest.raises(IndexError) as e:
+        c[1:2:3]
     assert str(e.value) == "slice with step not supported"
-    e = py.test.raises(IndexError, "c[1:2:1]")
+    with pytest.raises(IndexError) as e:
+        c[1:2:1]
     assert str(e.value) == "slice with step not supported"
-    e = py.test.raises(IndexError, "c[4:2]")
+    with pytest.raises(IndexError) as e:
+        c[4:2]
     assert str(e.value) == "slice start > stop"
-    e = py.test.raises(IndexError, "c[6:6]")
+    with pytest.raises(IndexError) as e:
+        c[6:6]
     assert str(e.value) == "index too large (expected 6 <= 5)"
 
 def test_setslice():
@@ -3165,9 +3274,11 @@
     assert list(c) == [0, 100, 300, 400, 0]
     cp[-1:1] = iter([500, 600])
     assert list(c) == [0, 100, 500, 600, 0]
-    py.test.raises(ValueError, "cp[-1:1] = [1000]")
+    with pytest.raises(ValueError):
+        cp[-1:1] = [1000]
     assert list(c) == [0, 100, 1000, 600, 0]
-    py.test.raises(ValueError, "cp[-1:1] = (700, 800, 900)")
+    with pytest.raises(ValueError):
+        cp[-1:1] = (700, 800, 900)
     assert list(c) == [0, 100, 700, 800, 0]
 
 def test_setslice_array():
@@ -3427,10 +3538,14 @@
     assert sizeof(q[0]) == sizeof(BStruct)
     #
     # error cases
-    py.test.raises(IndexError, "p.y[4]")
-    py.test.raises(TypeError, "p.y = cast(BIntP, 0)")
-    py.test.raises(TypeError, "p.y = 15")
-    py.test.raises(TypeError, "p.y = None")
+    with pytest.raises(IndexError):
+        p.y[4]
+    with pytest.raises(TypeError):
+        p.y = cast(BIntP, 0)
+    with pytest.raises(TypeError):
+        p.y = 15
+    with pytest.raises(TypeError):
+        p.y = None
     #
     # accepting this may be specified by the C99 standard,
     # or a GCC strangeness...
@@ -3452,6 +3567,15 @@
     assert p.a[1] == 20
     assert p.a[2] == 30
     assert p.a[3] == 0
+    #
+    # struct of struct of varsized array
+    BStruct2 = new_struct_type("bar")
+    complete_struct_or_union(BStruct2, [('head', BInt),
+                                        ('tail', BStruct)])
+    for i in range(2):   # try to detect heap overwrites
+        p = newp(new_pointer_type(BStruct2), [100, [200, list(range(50))]])
+        assert p.tail.y[49] == 49
+
 
 def test_struct_array_no_length_explicit_position():
     BInt = new_primitive_type("int")
@@ -3526,8 +3650,10 @@
     p[2:5] = [b"*", b"Z", b"T"]
     p[1:3] = b"XY"
     assert list(p) == [b"f", b"X", b"Y", b"Z", b"T", b"r", b"\x00"]
-    py.test.raises(TypeError, "p[1:5] = u+'XYZT'")
-    py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
+    with pytest.raises(TypeError):
+        p[1:5] = u+'XYZT'
+    with pytest.raises(TypeError):
+        p[1:5] = [1, 2, 3, 4]
     #
     for typename in ["wchar_t", "char16_t", "char32_t"]:
         BUniChar = new_primitive_type(typename)
@@ -3536,8 +3662,10 @@
         p[2:5] = [u+"*", u+"Z", u+"T"]
         p[1:3] = u+"XY"
         assert list(p) == [u+"f", u+"X", u+"Y", u+"Z", u+"T", u+"r", u+"\x00"]
-        py.test.raises(TypeError, "p[1:5] = b'XYZT'")
-        py.test.raises(TypeError, "p[1:5] = [1, 2, 3, 4]")
+        with pytest.raises(TypeError):
+            p[1:5] = b'XYZT'
+        with pytest.raises(TypeError):
+            p[1:5] = [1, 2, 3, 4]
 
 def test_void_p_arithmetic():
     BVoid = new_void_type()
@@ -3548,10 +3676,14 @@
     assert int(cast(BInt, p - (-42))) == 100042
     assert (p + 42) - p == 42
     q = cast(new_pointer_type(new_primitive_type("char")), 100000)
-    py.test.raises(TypeError, "p - q")
-    py.test.raises(TypeError, "q - p")
-    py.test.raises(TypeError, "p + cast(new_primitive_type('int'), 42)")
-    py.test.raises(TypeError, "p - cast(new_primitive_type('int'), 42)")
+    with pytest.raises(TypeError):
+        p - q
+    with pytest.raises(TypeError):
+        q - p
+    with pytest.raises(TypeError):
+        p + cast(new_primitive_type('int'), 42)
+    with pytest.raises(TypeError):
+        p - cast(new_primitive_type('int'), 42)
 
 def test_sizeof_sliced_array():
     BInt = new_primitive_type("int")
@@ -3758,7 +3890,9 @@
     BIntP = new_pointer_type(BInt)
     BIntA = new_array_type(BIntP, None)
     lst = [-12345678, 87654321, 489148]
-    bytestring = buffer(newp(BIntA, lst))[:] + b'XYZ'
+    bytestring = bytearray(buffer(newp(BIntA, lst))[:] + b'XYZ')
+    lst2 = lst + [42, -999999999]
+    bytestring2 = bytearray(buffer(newp(BIntA, lst2))[:] + b'XYZ')
     #
     p1 = from_buffer(BIntA, bytestring)      # int[]
     assert typeof(p1) is BIntA
@@ -3766,11 +3900,25 @@
     assert p1[0] == lst[0]
     assert p1[1] == lst[1]
     assert p1[2] == lst[2]
-    py.test.raises(IndexError, "p1[3]")
-    py.test.raises(IndexError, "p1[-1]")
+    with pytest.raises(IndexError):
+        p1[3]
+    with pytest.raises(IndexError):
+        p1[-1]
     #
     py.test.raises(TypeError, from_buffer, BInt, bytestring)
-    py.test.raises(TypeError, from_buffer, BIntP, bytestring)
+    #
+    p2 = from_buffer(BIntP, bytestring)      # int *
+    assert p2 == p1 or 'PY_DOT_PY' in globals()
+    # note: on py.py ^^^, bytearray buffers are not emulated well enough
+    assert typeof(p2) is BIntP
+    assert p2[0] == lst[0]
+    assert p2[1] == lst[1]
+    assert p2[2] == lst[2]
+    # hopefully does not crash, but doesn't raise an exception:
+    p2[3]
+    p2[-1]
+    # not enough data even for one, but this is not enforced:
+    from_buffer(BIntP, b"")
     #
     BIntA2 = new_array_type(BIntP, 2)
     p2 = from_buffer(BIntA2, bytestring)     # int[2]
@@ -3778,9 +3926,11 @@
     assert len(p2) == 2
     assert p2[0] == lst[0]
     assert p2[1] == lst[1]
-    py.test.raises(IndexError, "p2[2]")
-    py.test.raises(IndexError, "p2[-1]")
-    assert p2 == p1
+    with pytest.raises(IndexError):
+        p2[2]
+    with pytest.raises(IndexError):
+        p2[-1]
+    assert p2 == p1 or 'PY_DOT_PY' in globals()
     #
     BIntA4 = new_array_type(BIntP, 4)        # int[4]: too big
     py.test.raises(ValueError, from_buffer, BIntA4, bytestring)
@@ -3790,12 +3940,37 @@
                                        ('a2', BInt, -1)])
     BStructP = new_pointer_type(BStruct)
     BStructA = new_array_type(BStructP, None)
-    p1 = from_buffer(BStructA, bytestring)   # struct[]
-    assert len(p1) == 1
+    p1 = from_buffer(BStructA, bytestring2)   # struct[]
+    assert len(p1) == 2
     assert typeof(p1) is BStructA
-    assert p1[0].a1 == lst[0]
-    assert p1[0].a2 == lst[1]
-    py.test.raises(IndexError, "p1[1]")
+    assert p1[0].a1 == lst2[0]
+    assert p1[0].a2 == lst2[1]
+    assert p1[1].a1 == lst2[2]
+    assert p1[1].a2 == lst2[3]
+    with pytest.raises(IndexError):
+        p1[2]
+    with pytest.raises(IndexError):
+        p1[-1]
+    assert repr(p1) == "<cdata 'foo[]' buffer len 2 from 'bytearray' object>"
+    #
+    p2 = from_buffer(BStructP, bytestring2)    # 'struct *'
+    assert p2 == p1 or 'PY_DOT_PY' in globals()
+    assert typeof(p2) is BStructP
+    assert p2.a1 == lst2[0]
+    assert p2.a2 == lst2[1]
+    assert p2[0].a1 == lst2[0]
+    assert p2[0].a2 == lst2[1]
+    assert p2[1].a1 == lst2[2]
+    assert p2[1].a2 == lst2[3]
+    # does not crash:
+    p2[2]
+    p2[-1]
+    # not enough data even for one, but this is not enforced:
+    from_buffer(BStructP, b"")
+    from_buffer(BStructP, b"1234567")
+    #
+    release(p1)
+    assert repr(p1) == "<cdata 'foo[]' buffer RELEASED>"
     #
     BEmptyStruct = new_struct_type("empty")
     complete_struct_or_union(BEmptyStruct, [], Ellipsis, 0)
@@ -3809,7 +3984,51 @@
     p1 = from_buffer(BEmptyStructA5, bytestring)   # struct empty[5]
     assert typeof(p1) is BEmptyStructA5
     assert len(p1) == 5
-    assert cast(BIntP, p1) == from_buffer(BIntA, bytestring)
+    assert (cast(BIntP, p1) == from_buffer(BIntA, bytestring)
+            or 'PY_DOT_PY' in globals())
+    #
+    BVarStruct = new_struct_type("varfoo")
+    BVarStructP = new_pointer_type(BVarStruct)
+    complete_struct_or_union(BVarStruct, [('a1', BInt, -1),
+                                          ('va', BIntA, -1)])
+    with pytest.raises(TypeError):
+        from_buffer(BVarStruct, bytestring)
+    pv = from_buffer(BVarStructP, bytestring)    # varfoo *
+    assert pv.a1 == lst[0]
+    assert pv.va[0] == lst[1]
+    assert pv.va[1] == lst[2]
+    assert sizeof(pv[0]) == 1 * size_of_int()
+    with pytest.raises(TypeError):
+        len(pv.va)
+    # hopefully does not crash, but doesn't raise an exception:
+    pv.va[2]
+    pv.va[-1]
+    # not enough data even for one, but this is not enforced:
+    from_buffer(BVarStructP, b"")
+    assert repr(pv) == "<cdata 'varfoo *' buffer from 'bytearray' object>"
+    assert repr(pv[0]).startswith("<cdata 'varfoo &' ")
+    #
+    release(pv)
+    assert repr(pv) == "<cdata 'varfoo *' buffer RELEASED>"
+    assert repr(pv[0]).startswith("<cdata 'varfoo &' ")
+    #
+    pv = from_buffer(BVarStructP, bytestring)    # make a fresh one
+    with pytest.raises(ValueError):
+        release(pv[0])
+
+def test_issue483():
+    BInt = new_primitive_type("int")
+    BIntP = new_pointer_type(BInt)
+    BIntA = new_array_type(BIntP, None)
+    lst = list(range(25))
+    bytestring = bytearray(buffer(newp(BIntA, lst))[:] + b'XYZ')
+    p1 = from_buffer(BIntA, bytestring)      # int[]
+    assert len(buffer(p1)) == 25 * size_of_int()
+    assert sizeof(p1) == 25 * size_of_int()
+    #
+    p2 = from_buffer(BIntP, bytestring)
+    assert sizeof(p2) == size_of_ptr()
+    assert len(buffer(p2)) == size_of_int()  # first element only, by default
 
 def test_memmove():
     Short = new_primitive_type("short")
@@ -3887,10 +4106,14 @@
     BInt = new_primitive_type("int")
     BIntPtr = new_pointer_type(BInt)
     p = cast(BIntPtr, 0)
-    py.test.raises(RuntimeError, "p[0]")
-    py.test.raises(RuntimeError, "p[0] = 42")
-    py.test.raises(RuntimeError, "p[42]")
-    py.test.raises(RuntimeError, "p[42] = -1")
+    with pytest.raises(RuntimeError):
+        p[0]
+    with pytest.raises(RuntimeError):
+        p[0] = 42
+    with pytest.raises(RuntimeError):
+        p[42]
+    with pytest.raises(RuntimeError):
+        p[42] = -1
 
 def test_mixup():
     BStruct1 = new_struct_type("foo")
@@ -3906,10 +4129,12 @@
     pp2 = newp(BStruct2PtrPtr)
     pp3 = newp(BStruct3PtrPtr)
     pp1[0] = pp1[0]
-    e = py.test.raises(TypeError, "pp3[0] = pp1[0]")
+    with pytest.raises(TypeError) as e:
+        pp3[0] = pp1[0]
     assert str(e.value).startswith("initializer for ctype 'bar *' must be a ")
     assert str(e.value).endswith(", not cdata 'foo *'")
-    e = py.test.raises(TypeError, "pp2[0] = pp1[0]")
+    with pytest.raises(TypeError) as e:
+        pp2[0] = pp1[0]
     assert str(e.value) == ("initializer for ctype 'foo *' appears indeed to "
                             "be 'foo *', but the types are different (check "
                             "that you are not e.g. mixing up different ffi "
@@ -4098,14 +4323,14 @@
         assert (a != b) is True
         assert (b != a) is True
         if strict_compare:
-            py.test.raises(TypeError, "a < b")
-            py.test.raises(TypeError, "a <= b")
-            py.test.raises(TypeError, "a > b")
-            py.test.raises(TypeError, "a >= b")
-            py.test.raises(TypeError, "b < a")
-            py.test.raises(TypeError, "b <= a")
-            py.test.raises(TypeError, "b > a")
-            py.test.raises(TypeError, "b >= a")
+            with pytest.raises(TypeError): a < b
+            with pytest.raises(TypeError): a <= b
+            with pytest.raises(TypeError): a > b
+            with pytest.raises(TypeError): a >= b
+            with pytest.raises(TypeError): b < a
+            with pytest.raises(TypeError): b <= a
+            with pytest.raises(TypeError): b > a
+            with pytest.raises(TypeError): b >= a
         elif a < b:
             assert_lt(a, b)
         else:
@@ -4151,7 +4376,8 @@
     BIntP = new_pointer_type(new_primitive_type("int"))
     p = newp(BIntP)
     p[0] = 42
-    py.test.raises(IndexError, "p[1]")
+    with pytest.raises(IndexError):
+        p[1]
     release(p)
     # here, reading p[0] might give garbage or segfault...
     release(p)   # no effect
@@ -4187,8 +4413,12 @@
 def test_explicit_release_badtype_contextmgr():
     BIntP = new_pointer_type(new_primitive_type("int"))
     p = cast(BIntP, 12345)
-    py.test.raises(ValueError, "with p: pass")
-    py.test.raises(ValueError, "with p: pass")
+    with pytest.raises(ValueError):
+        with p:
+            pass
+    with pytest.raises(ValueError):
+        with p:
+            pass
 
 def test_explicit_release_gc():
     BIntP = new_pointer_type(new_primitive_type("int"))
@@ -4224,8 +4454,10 @@
     BCharA = new_array_type(BCharP, None)
     p = from_buffer(BCharA, a)
     assert p[2] == b"z"
+    assert repr(p) == "<cdata 'char[]' buffer len 3 from 'bytearray' object>"
     release(p)
     assert p[2] == b"z"  # true so far, but might change to raise RuntimeError
+    assert repr(p) == "<cdata 'char[]' buffer RELEASED>"
     release(p)   # no effect
 
 def test_explicit_release_from_buffer_contextmgr():
@@ -4237,6 +4469,7 @@
     with p:
         assert p[2] == b"z"
     assert p[2] == b"z"  # true so far, but might change to raise RuntimeError
+    assert repr(p) == "<cdata 'char[]' buffer RELEASED>"
     release(p)   # no effect
 
 def test_explicit_release_bytearray_on_cpython():
@@ -4248,9 +4481,95 @@
     BCharA = new_array_type(BCharP, None)
     a += b't' * 10
     p = from_buffer(BCharA, a)
-    py.test.raises(BufferError, "a += b'u' * 100")
+    with pytest.raises(BufferError):
+        a += b'u' * 100
     release(p)
     a += b'v' * 100
     release(p)   # no effect
     a += b'w' * 1000
     assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000)
+
+def test_int_doesnt_give_bool():
+    BBool = new_primitive_type("_Bool")
+    x = int(cast(BBool, 42))
+    assert type(x) is int and x == 1
+    x = long(cast(BBool, 42))
+    assert type(x) is long and x == 1
+    with pytest.raises(TypeError):
+        float(cast(BBool, 42))
+    with pytest.raises(TypeError):
+        complex(cast(BBool, 42))
+
+def test_cannot_call_null_function_pointer():
+    BInt = new_primitive_type("int")
+    BFunc = new_function_type((BInt, BInt), BInt, False)
+    f = cast(BFunc, 0)
+    with pytest.raises(RuntimeError):
+        f(40, 2)
+
+def test_huge_structure():
+    BChar = new_primitive_type("char")
+    BArray = new_array_type(new_pointer_type(BChar), sys.maxsize)
+    assert sizeof(BArray) == sys.maxsize
+    BStruct = new_struct_type("struct foo")
+    complete_struct_or_union(BStruct, [('a1', BArray, -1)])
+    assert sizeof(BStruct) == sys.maxsize
+
+def test_get_types():
+    import _cffi_backend
+    CData, CType = _get_types()
+    assert CData is _cffi_backend._CDataBase
+    assert CType is _cffi_backend.CType
+
+def test_type_available_with_correct_names():
+    import _cffi_backend
+    check_names = [
+        'CType',
+        'CField',
+        'CLibrary',
+        '_CDataBase',
+        'FFI',
+        'Lib',
+        'buffer',
+    ]
+    if '__pypy__' in sys.builtin_module_names:
+        check_names += [
+            '__CData_iterator',
+            '__FFIGlobSupport',
+            '__FFIAllocator',
+            '__FFIFunctionWrapper',
+        ]
+    else:
+        check_names += [
+            '__CDataOwn',
+            '__CDataOwnGC',
+            '__CDataFromBuf',
+            '__CDataGCP',
+            '__CData_iterator',
+            '__FFIGlobSupport',
+        ]
+    for name in check_names:
+        tp = getattr(_cffi_backend, name)
+        assert isinstance(tp, type)
+        assert (tp.__module__, tp.__name__) == ('_cffi_backend', name)
+
+def test_unaligned_types():
+    BByteArray = new_array_type(
+        new_pointer_type(new_primitive_type("unsigned char")), None)
+    pbuf = newp(BByteArray, 40)
+    buf = buffer(pbuf)
+    #
+    for name in ['short', 'int', 'long', 'long long', 'float', 'double',
+                 'float _Complex', 'double _Complex']:
+        p = new_primitive_type(name)
+        if name.endswith(' _Complex'):
+            num = cast(p, 1.23 - 4.56j)
+        else:
+            num = cast(p, 0x0123456789abcdef)
+        size = sizeof(p)
+        buf[0:40] = b"\x00" * 40
+        pbuf1 = cast(new_pointer_type(p), pbuf + 1)
+        pbuf1[0] = num
+        assert pbuf1[0] == num
+        assert buf[0] == b'\x00'
+        assert buf[1 + size] == b'\x00'
diff --git a/cffi/__init__.py b/cffi/__init__.py
index 5ebb64b..82a9618 100644
--- a/cffi/__init__.py
+++ b/cffi/__init__.py
@@ -5,8 +5,8 @@
 from .error import CDefError, FFIError, VerificationError, VerificationMissing
 from .error import PkgConfigError
 
-__version__ = "1.12.2"
-__version_info__ = (1, 12, 2)
+__version__ = "1.15.0"
+__version_info__ = (1, 15, 0)
 
 # The verifier module file names are based on the CRC32 of a string that
 # contains the following version number.  It may be older than __version__
diff --git a/cffi/_cffi_errors.h b/cffi/_cffi_errors.h
index 83cdad0..158e059 100644
--- a/cffi/_cffi_errors.h
+++ b/cffi/_cffi_errors.h
@@ -54,6 +54,8 @@
         "      of.write(x)\n"
         "    except: pass\n"
         "    self.buf += x\n"
+        "  def flush(self):\n"
+        "    pass\n"
         "fl = FileLike()\n"
         "fl.buf = ''\n"
         "of = sys.stderr\n"
diff --git a/cffi/_cffi_include.h b/cffi/_cffi_include.h
index 37ea74f..e4c0a67 100644
--- a/cffi/_cffi_include.h
+++ b/cffi/_cffi_include.h
@@ -8,20 +8,49 @@
    the same works for the other two macros.  Py_DEBUG implies them,
    but not the other way around.
 
-   Issue #350 is still open: on Windows, the code here causes it to link
-   with PYTHON36.DLL (for example) instead of PYTHON3.DLL.  A fix was
-   attempted in 164e526a5515 and 14ce6985e1c3, but reverted: virtualenv
-   does not make PYTHON3.DLL available, and so the "correctly" compiled
-   version would not run inside a virtualenv.  We will re-apply the fix
-   after virtualenv has been fixed for some time.  For explanation, see
-   issue #355.  For a workaround if you want PYTHON3.DLL and don't worry
-   about virtualenv, see issue #350.  See also 'py_limited_api' in
-   setuptools_ext.py.
+   The implementation is messy (issue #350): on Windows, with _MSC_VER,
+   we have to define Py_LIMITED_API even before including pyconfig.h.
+   In that case, we guess what pyconfig.h will do to the macros above,
+   and check our guess after the #include.
+
+   Note that on Windows, with CPython 3.x, you need >= 3.5 and virtualenv
+   version >= 16.0.0.  With older versions of either, you don't get a
+   copy of PYTHON3.DLL in the virtualenv.  We can't check the version of
+   CPython *before* we even include pyconfig.h.  ffi.set_source() puts
+   a ``#define _CFFI_NO_LIMITED_API'' at the start of this file if it is
+   running on Windows < 3.5, as an attempt at fixing it, but that's
+   arguably wrong because it may not be the target version of Python.
+   Still better than nothing I guess.  As another workaround, you can
+   remove the definition of Py_LIMITED_API here.
+
+   See also 'py_limited_api' in cffi/setuptools_ext.py.
 */
 #if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API)
-#  include <pyconfig.h>
-#  if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG)
-#    define Py_LIMITED_API
+#  ifdef _MSC_VER
+#    if !defined(_DEBUG) && !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API)
+#      define Py_LIMITED_API
+#    endif
+#    include <pyconfig.h>
+     /* sanity-check: Py_LIMITED_API will cause crashes if any of these
+        are also defined.  Normally, the Python file PC/pyconfig.h does not
+        cause any of these to be defined, with the exception that _DEBUG
+        causes Py_DEBUG.  Double-check that. */
+#    ifdef Py_LIMITED_API
+#      if defined(Py_DEBUG)
+#        error "pyconfig.h unexpectedly defines Py_DEBUG, but Py_LIMITED_API is set"
+#      endif
+#      if defined(Py_TRACE_REFS)
+#        error "pyconfig.h unexpectedly defines Py_TRACE_REFS, but Py_LIMITED_API is set"
+#      endif
+#      if defined(Py_REF_DEBUG)
+#        error "pyconfig.h unexpectedly defines Py_REF_DEBUG, but Py_LIMITED_API is set"
+#      endif
+#    endif
+#  else
+#    include <pyconfig.h>
+#    if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) && !defined(_CFFI_NO_LIMITED_API)
+#      define Py_LIMITED_API
+#    endif
 #  endif
 #endif
 
@@ -261,14 +290,62 @@
         return (int)_cffi_to_c_wchar3216_t(o);
 }
 
-_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
+_CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(unsigned int x)
 {
     if (sizeof(_cffi_wchar_t) == 4)
         return _cffi_from_c_wchar_t((_cffi_wchar_t)x);
     else
-        return _cffi_from_c_wchar3216_t(x);
+        return _cffi_from_c_wchar3216_t((int)x);
 }
 
+union _cffi_union_alignment_u {
+    unsigned char m_char;
+    unsigned short m_short;
+    unsigned int m_int;
+    unsigned long m_long;
+    unsigned long long m_longlong;
+    float m_float;
+    double m_double;
+    long double m_longdouble;
+};
+
+struct _cffi_freeme_s {
+    struct _cffi_freeme_s *next;
+    union _cffi_union_alignment_u alignment;
+};
+
+_CFFI_UNUSED_FN static int
+_cffi_convert_array_argument(struct _cffi_ctypedescr *ctptr, PyObject *arg,
+                             char **output_data, Py_ssize_t datasize,
+                             struct _cffi_freeme_s **freeme)
+{
+    char *p;
+    if (datasize < 0)
+        return -1;
+
+    p = *output_data;
+    if (p == NULL) {
+        struct _cffi_freeme_s *fp = (struct _cffi_freeme_s *)PyObject_Malloc(
+            offsetof(struct _cffi_freeme_s, alignment) + (size_t)datasize);
+        if (fp == NULL)
+            return -1;
+        fp->next = *freeme;
+        *freeme = fp;
+        p = *output_data = (char *)&fp->alignment;
+    }
+    memset((void *)p, 0, (size_t)datasize);
+    return _cffi_convert_array_from_object(p, ctptr, arg);
+}
+
+_CFFI_UNUSED_FN static void
+_cffi_free_array_arguments(struct _cffi_freeme_s *freeme)
+{
+    do {
+        void *p = (void *)freeme;
+        freeme = freeme->next;
+        PyObject_Free(p);
+    } while (freeme != NULL);
+}
 
 /**********  end CPython-specific section  **********/
 #else
diff --git a/cffi/_embedding.h b/cffi/_embedding.h
index 3953cd7..e863d85 100644
--- a/cffi/_embedding.h
+++ b/cffi/_embedding.h
@@ -145,6 +145,7 @@
     int result;
     PyGILState_STATE state;
     PyObject *pycode=NULL, *global_dict=NULL, *x;
+    PyObject *builtins;
 
     state = PyGILState_Ensure();
 
@@ -169,8 +170,10 @@
     global_dict = PyDict_New();
     if (global_dict == NULL)
         goto error;
-    if (PyDict_SetItemString(global_dict, "__builtins__",
-                             PyThreadState_GET()->interp->builtins) < 0)
+    builtins = PyEval_GetBuiltins();
+    if (builtins == NULL)
+        goto error;
+    if (PyDict_SetItemString(global_dict, "__builtins__", builtins) < 0)
         goto error;
     x = PyEval_EvalCode(
 #if PY_MAJOR_VERSION < 3
@@ -221,7 +224,7 @@
 
         if (f != NULL && f != Py_None) {
             PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME
-                               "\ncompiled with cffi version: 1.12.2"
+                               "\ncompiled with cffi version: 1.15.0"
                                "\n_cffi_backend module: ", f);
             modules = PyImport_GetModuleDict();
             mod = PyDict_GetItemString(modules, "_cffi_backend");
@@ -243,7 +246,9 @@
     goto done;
 }
 
+#if PY_VERSION_HEX < 0x03080000
 PyAPI_DATA(char *) _PyParser_TokenNames[];  /* from CPython */
+#endif
 
 static int _cffi_carefully_make_gil(void)
 {
@@ -263,23 +268,33 @@
        So we use a global variable as a simple spin lock.  This global
        variable must be from 'libpythonX.Y.so', not from this
        cffi-based extension module, because it must be shared from
-       different cffi-based extension modules.  We choose
+       different cffi-based extension modules.
+
+       In Python < 3.8, we choose
        _PyParser_TokenNames[0] as a completely arbitrary pointer value
        that is never written to.  The default is to point to the
        string "ENDMARKER".  We change it temporarily to point to the
        next character in that string.  (Yes, I know it's REALLY
        obscure.)
+
+       In Python >= 3.8, this string array is no longer writable, so
+       instead we pick PyCapsuleType.tp_version_tag.  We can't change
+       Python < 3.8 because someone might use a mixture of cffi
+       embedded modules, some of which were compiled before this file
+       changed.
     */
 
 #ifdef WITH_THREAD
+# if PY_VERSION_HEX < 0x03080000
     char *volatile *lock = (char *volatile *)_PyParser_TokenNames;
-    char *old_value;
+    char *old_value, *locked_value;
 
     while (1) {    /* spin loop */
         old_value = *lock;
+        locked_value = old_value + 1;
         if (old_value[0] == 'E') {
             assert(old_value[1] == 'N');
-            if (cffi_compare_and_swap(lock, old_value, old_value + 1))
+            if (cffi_compare_and_swap(lock, old_value, locked_value))
                 break;
         }
         else {
@@ -290,23 +305,51 @@
                this is only run at start-up anyway. */
         }
     }
+# else
+    int volatile *lock = (int volatile *)&PyCapsule_Type.tp_version_tag;
+    int old_value, locked_value;
+    assert(!(PyCapsule_Type.tp_flags & Py_TPFLAGS_HAVE_VERSION_TAG));
+
+    while (1) {    /* spin loop */
+        old_value = *lock;
+        locked_value = -42;
+        if (old_value == 0) {
+            if (cffi_compare_and_swap(lock, old_value, locked_value))
+                break;
+        }
+        else {
+            assert(old_value == locked_value);
+            /* should ideally do a spin loop instruction here, but
+               hard to do it portably and doesn't really matter I
+               think: PyEval_InitThreads() should be very fast, and
+               this is only run at start-up anyway. */
+        }
+    }
+# endif
 #endif
 
     /* call Py_InitializeEx() */
-    {
-        PyGILState_STATE state = PyGILState_UNLOCKED;
-        if (!Py_IsInitialized())
-            _cffi_py_initialize();
-        else
-            state = PyGILState_Ensure();
-
+    if (!Py_IsInitialized()) {
+        _cffi_py_initialize();
+#if PY_VERSION_HEX < 0x03070000
+        PyEval_InitThreads();
+#endif
+        PyEval_SaveThread();  /* release the GIL */
+        /* the returned tstate must be the one that has been stored into the
+           autoTLSkey by _PyGILState_Init() called from Py_Initialize(). */
+    }
+    else {
+#if PY_VERSION_HEX < 0x03070000
+        /* PyEval_InitThreads() is always a no-op from CPython 3.7 */
+        PyGILState_STATE state = PyGILState_Ensure();
         PyEval_InitThreads();
         PyGILState_Release(state);
+#endif
     }
 
 #ifdef WITH_THREAD
     /* release the lock */
-    while (!cffi_compare_and_swap(lock, old_value + 1, old_value))
+    while (!cffi_compare_and_swap(lock, locked_value, old_value))
         ;
 #endif
 
@@ -325,11 +368,11 @@
 
 static struct _cffi_pypy_init_s {
     const char *name;
-    void (*func)(const void *[]);
+    void *func;    /* function pointer */
     const char *code;
 } _cffi_pypy_init = {
     _CFFI_MODULE_NAME,
-    (void(*)(const void *[]))_CFFI_PYTHON_STARTUP_FUNC,
+    _CFFI_PYTHON_STARTUP_FUNC,
     _CFFI_PYTHON_STARTUP_CODE,
 };
 
diff --git a/cffi/api.py b/cffi/api.py
index 32fe620..999a8ae 100644
--- a/cffi/api.py
+++ b/cffi/api.py
@@ -141,7 +141,11 @@
         linked to a particular library, just like C headers; in the
         library we only look for the actual (untyped) symbols.
         """
-        assert isinstance(name, basestring) or name is None
+        if not (isinstance(name, basestring) or
+                name is None or
+                isinstance(name, self.CData)):
+            raise TypeError("dlopen(name): name must be a file name, None, "
+                            "or an already-opened 'void *' handle")
         with self._lock:
             lib, function_cache = _make_ffi_library(self, name, flags)
             self._function_caches.append(function_cache)
@@ -799,9 +803,9 @@
 
 def _load_backend_lib(backend, name, flags):
     import os
-    if name is None:
-        if sys.platform != "win32":
-            return backend.load_library(None, flags)
+    if not isinstance(name, basestring):
+        if sys.platform != "win32" or name is not None:
+            return backend.load_library(name, flags)
         name = "c"    # Windows: load_library(None) fails, but this works
                       # on Python 2 (backward compatibility hack only)
     first_error = None
@@ -935,7 +939,7 @@
             backendlib.close_lib()
             self.__dict__.clear()
     #
-    if libname is not None:
+    if isinstance(libname, basestring):
         try:
             if not isinstance(libname, str):    # unicode, on Python 2
                 libname = libname.encode('utf-8')
diff --git a/cffi/backend_ctypes.py b/cffi/backend_ctypes.py
index 679ae05..e7956a7 100644
--- a/cffi/backend_ctypes.py
+++ b/cffi/backend_ctypes.py
@@ -403,7 +403,7 @@
                         source = _cast_source_to_int(source)
                     return cls(bool(source))
                 def __int__(self):
-                    return self._value
+                    return int(self._value)
 
             if kind == 'char':
                 @classmethod
diff --git a/cffi/cparser.py b/cffi/cparser.py
index df6303d..74830e9 100644
--- a/cffi/cparser.py
+++ b/cffi/cparser.py
@@ -29,6 +29,7 @@
 _r_define  = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
                         r"\b((?:[^\n\\]|\\.)*?)$",
                         re.DOTALL | re.MULTILINE)
+_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE)
 _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
 _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
 _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
@@ -145,17 +146,55 @@
     return ''.join(parts)
 
 def _warn_for_string_literal(csource):
-    if '"' in csource:
+    if '"' not in csource:
+        return
+    for line in csource.splitlines():
+        if '"' in line and not line.lstrip().startswith('#'):
+            import warnings
+            warnings.warn("String literal found in cdef() or type source. "
+                          "String literals are ignored here, but you should "
+                          "remove them anyway because some character sequences "
+                          "confuse pre-parsing.")
+            break
+
+def _warn_for_non_extern_non_static_global_variable(decl):
+    if not decl.storage:
         import warnings
-        warnings.warn("String literal found in cdef() or type source. "
-                      "String literals are ignored here, but you should "
-                      "remove them anyway because some character sequences "
-                      "confuse pre-parsing.")
+        warnings.warn("Global variable '%s' in cdef(): for consistency "
+                      "with C it should have a storage class specifier "
+                      "(usually 'extern')" % (decl.name,))
+
+def _remove_line_directives(csource):
+    # _r_line_directive matches whole lines, without the final \n, if they
+    # start with '#line' with some spacing allowed, or '#NUMBER'.  This
+    # function stores them away and replaces them with exactly the string
+    # '#line@N', where N is the index in the list 'line_directives'.
+    line_directives = []
+    def replace(m):
+        i = len(line_directives)
+        line_directives.append(m.group())
+        return '#line@%d' % i
+    csource = _r_line_directive.sub(replace, csource)
+    return csource, line_directives
+
+def _put_back_line_directives(csource, line_directives):
+    def replace(m):
+        s = m.group()
+        if not s.startswith('#line@'):
+            raise AssertionError("unexpected #line directive "
+                                 "(should have been processed and removed")
+        return line_directives[int(s[6:])]
+    return _r_line_directive.sub(replace, csource)
 
 def _preprocess(csource):
+    # First, remove the lines of the form '#line N "filename"' because
+    # the "filename" part could confuse the rest
+    csource, line_directives = _remove_line_directives(csource)
     # Remove comments.  NOTE: this only work because the cdef() section
-    # should not contain any string literal!
-    csource = _r_comment.sub(' ', csource)
+    # should not contain any string literals (except in line directives)!
+    def replace_keeping_newlines(m):
+        return ' ' + m.group().count('\n') * '\n'
+    csource = _r_comment.sub(replace_keeping_newlines, csource)
     # Remove the "#define FOO x" lines
     macros = {}
     for match in _r_define.finditer(csource):
@@ -208,7 +247,10 @@
     csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
     # Replace all remaining "..." with the same name, "__dotdotdot__",
     # which is declared with a typedef for the purpose of C parsing.
-    return csource.replace('...', ' __dotdotdot__ '), macros
+    csource = csource.replace('...', ' __dotdotdot__ ')
+    # Finally, put back the line directives
+    csource = _put_back_line_directives(csource, line_directives)
+    return csource, macros
 
 def _common_type_names(csource):
     # Look in the source for what looks like usages of types from the
@@ -384,7 +426,8 @@
                         realtype = self._get_unknown_ptr_type(decl)
                     else:
                         realtype, quals = self._get_type_and_quals(
-                            decl.type, name=decl.name, partial_length_ok=True)
+                            decl.type, name=decl.name, partial_length_ok=True,
+                            typedef_example="*(%s *)0" % (decl.name,))
                     self._declare('typedef ' + decl.name, realtype, quals=quals)
                 elif decl.__class__.__name__ == 'Pragma':
                     pass    # skip pragma, only in pycparser 2.15
@@ -502,6 +545,7 @@
                     if (quals & model.Q_CONST) and not tp.is_array_type:
                         self._declare('constant ' + decl.name, tp, quals=quals)
                     else:
+                        _warn_for_non_extern_non_static_global_variable(decl)
                         self._declare('variable ' + decl.name, tp, quals=quals)
 
     def parse_type(self, cdecl):
@@ -550,7 +594,8 @@
             return model.NamedPointerType(type, declname, quals)
         return model.PointerType(type, quals)
 
-    def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False):
+    def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False,
+                            typedef_example=None):
         # first, dereference typedefs, if we have it already parsed, we're good
         if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
             isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
@@ -567,8 +612,18 @@
             else:
                 length = self._parse_constant(
                     typenode.dim, partial_length_ok=partial_length_ok)
+            # a hack: in 'typedef int foo_t[...][...];', don't use '...' as
+            # the length but use directly the C expression that would be
+            # generated by recompiler.py.  This lets the typedef be used in
+            # many more places within recompiler.py
+            if typedef_example is not None:
+                if length == '...':
+                    length = '_cffi_array_len(%s)' % (typedef_example,)
+                typedef_example = "*" + typedef_example
+            #
             tp, quals = self._get_type_and_quals(typenode.type,
-                                partial_length_ok=partial_length_ok)
+                                partial_length_ok=partial_length_ok,
+                                typedef_example=typedef_example)
             return model.ArrayType(tp, length), quals
         #
         if isinstance(typenode, pycparser.c_ast.PtrDecl):
@@ -817,12 +872,20 @@
         # or positive/negative number
         if isinstance(exprnode, pycparser.c_ast.Constant):
             s = exprnode.value
-            if s.startswith('0'):
-                if s.startswith('0x') or s.startswith('0X'):
-                    return int(s, 16)
-                return int(s, 8)
-            elif '1' <= s[0] <= '9':
-                return int(s, 10)
+            if '0' <= s[0] <= '9':
+                s = s.rstrip('uUlL')
+                try:
+                    if s.startswith('0'):
+                        return int(s, 8)
+                    else:
+                        return int(s, 10)
+                except ValueError:
+                    if len(s) > 1:
+                        if s.lower()[0:2] == '0x':
+                            return int(s, 16)
+                        elif s.lower()[0:2] == '0b':
+                            return int(s, 2)
+                raise CDefError("invalid constant %r" % (s,))
             elif s[0] == "'" and s[-1] == "'" and (
                     len(s) == 3 or (len(s) == 4 and s[1] == "\\")):
                 return ord(s[-2])
@@ -850,19 +913,39 @@
                            "the actual array length in this context"
                            % exprnode.coord.line)
         #
-        if (isinstance(exprnode, pycparser.c_ast.BinaryOp) and
-                exprnode.op == '+'):
-            return (self._parse_constant(exprnode.left) +
-                    self._parse_constant(exprnode.right))
-        #
-        if (isinstance(exprnode, pycparser.c_ast.BinaryOp) and
-                exprnode.op == '-'):
-            return (self._parse_constant(exprnode.left) -
-                    self._parse_constant(exprnode.right))
+        if isinstance(exprnode, pycparser.c_ast.BinaryOp):
+            left = self._parse_constant(exprnode.left)
+            right = self._parse_constant(exprnode.right)
+            if exprnode.op == '+':
+                return left + right
+            elif exprnode.op == '-':
+                return left - right
+            elif exprnode.op == '*':
+                return left * right
+            elif exprnode.op == '/':
+                return self._c_div(left, right)
+            elif exprnode.op == '%':
+                return left - self._c_div(left, right) * right
+            elif exprnode.op == '<<':
+                return left << right
+            elif exprnode.op == '>>':
+                return left >> right
+            elif exprnode.op == '&':
+                return left & right
+            elif exprnode.op == '|':
+                return left | right
+            elif exprnode.op == '^':
+                return left ^ right
         #
         raise FFIError(":%d: unsupported expression: expected a "
                        "simple numeric constant" % exprnode.coord.line)
 
+    def _c_div(self, a, b):
+        result = a // b
+        if ((a < 0) ^ (b < 0)) and (a % b) != 0:
+            result += 1
+        return result
+
     def _build_enum_type(self, explicit_name, decls):
         if decls is not None:
             partial = False
diff --git a/cffi/model.py b/cffi/model.py
index 5f1b0d2..ad1c176 100644
--- a/cffi/model.py
+++ b/cffi/model.py
@@ -307,11 +307,14 @@
         self.c_name_with_marker = (
             self.item.c_name_with_marker.replace('&', brackets))
 
+    def length_is_unknown(self):
+        return isinstance(self.length, str)
+
     def resolve_length(self, newlength):
         return ArrayType(self.item, newlength)
 
     def build_backend_type(self, ffi, finishlist):
-        if self.length == '...':
+        if self.length_is_unknown():
             raise CDefError("cannot render the type %r: unknown length" %
                             (self,))
         self.item.get_cached_btype(ffi, finishlist)   # force the item BType
@@ -430,7 +433,7 @@
                 fsize = fieldsize[i]
                 ftype = self.fldtypes[i]
                 #
-                if isinstance(ftype, ArrayType) and ftype.length == '...':
+                if isinstance(ftype, ArrayType) and ftype.length_is_unknown():
                     # fix the length to match the total size
                     BItemType = ftype.item.get_cached_btype(ffi, finishlist)
                     nlen, nrest = divmod(fsize, ffi.sizeof(BItemType))
diff --git a/cffi/recompiler.py b/cffi/recompiler.py
index 20e912b..86b37d7 100644
--- a/cffi/recompiler.py
+++ b/cffi/recompiler.py
@@ -7,6 +7,9 @@
 VERSION_EMBEDDED = 0x2701
 VERSION_CHAR16CHAR32 = 0x2801
 
+USE_LIMITED_API = (sys.platform != 'win32' or sys.version_info < (3, 0) or
+                   sys.version_info >= (3, 5))
+
 
 class GlobalExpr:
     def __init__(self, name, address, type_op, size=0, check_value=0):
@@ -190,6 +193,17 @@
             assert isinstance(op, CffiOp)
         self.cffi_types = tuple(self.cffi_types)    # don't change any more
 
+    def _enum_fields(self, tp):
+        # When producing C, expand all anonymous struct/union fields.
+        # That's necessary to have C code checking the offsets of the
+        # individual fields contained in them.  When producing Python,
+        # don't do it and instead write it like it is, with the
+        # corresponding fields having an empty name.  Empty names are
+        # recognized at runtime when we import the generated Python
+        # file.
+        expand_anonymous_struct_union = not self.target_is_python
+        return tp.enumfields(expand_anonymous_struct_union)
+
     def _do_collect_type(self, tp):
         if not isinstance(tp, model.BaseTypeByIdentity):
             if isinstance(tp, tuple):
@@ -203,7 +217,7 @@
             elif isinstance(tp, model.StructOrUnion):
                 if tp.fldtypes is not None and (
                         tp not in self.ffi._parser._included_declarations):
-                    for name1, tp1, _, _ in tp.enumfields():
+                    for name1, tp1, _, _ in self._enum_fields(tp):
                         self._do_collect_type(self._field_type(tp, name1, tp1))
             else:
                 for _, x in tp._get_items():
@@ -283,6 +297,8 @@
         prnt = self._prnt
         if self.ffi._embedding is not None:
             prnt('#define _CFFI_USE_EMBEDDING')
+        if not USE_LIMITED_API:
+            prnt('#define _CFFI_NO_LIMITED_API')
         #
         # first the '#include' (actually done by inlining the file's content)
         lines = self._rel_readlines('_cffi_include.h')
@@ -560,23 +576,24 @@
             tovar, tp.get_c_name(''), errvalue))
         self._prnt('    %s;' % errcode)
 
-    def _extra_local_variables(self, tp, localvars):
+    def _extra_local_variables(self, tp, localvars, freelines):
         if isinstance(tp, model.PointerType):
             localvars.add('Py_ssize_t datasize')
+            localvars.add('struct _cffi_freeme_s *large_args_free = NULL')
+            freelines.add('if (large_args_free != NULL)'
+                          ' _cffi_free_array_arguments(large_args_free);')
 
     def _convert_funcarg_to_c_ptr_or_array(self, tp, fromvar, tovar, errcode):
         self._prnt('  datasize = _cffi_prepare_pointer_call_argument(')
         self._prnt('      _cffi_type(%d), %s, (char **)&%s);' % (
             self._gettypenum(tp), fromvar, tovar))
         self._prnt('  if (datasize != 0) {')
-        self._prnt('    if (datasize < 0)')
-        self._prnt('      %s;' % errcode)
-        self._prnt('    %s = (%s)alloca((size_t)datasize);' % (
+        self._prnt('    %s = ((size_t)datasize) <= 640 ? '
+                   '(%s)alloca((size_t)datasize) : NULL;' % (
             tovar, tp.get_c_name('')))
-        self._prnt('    memset((void *)%s, 0, (size_t)datasize);' % (tovar,))
-        self._prnt('    if (_cffi_convert_array_from_object('
-                   '(char *)%s, _cffi_type(%d), %s) < 0)' % (
-            tovar, self._gettypenum(tp), fromvar))
+        self._prnt('    if (_cffi_convert_array_argument(_cffi_type(%d), %s, '
+                   '(char **)&%s,' % (self._gettypenum(tp), fromvar, tovar))
+        self._prnt('            datasize, &large_args_free) < 0)')
         self._prnt('      %s;' % errcode)
         self._prnt('  }')
 
@@ -699,9 +716,10 @@
             prnt('  %s;' % arg)
         #
         localvars = set()
+        freelines = set()
         for type in tp.args:
-            self._extra_local_variables(type, localvars)
-        for decl in localvars:
+            self._extra_local_variables(type, localvars, freelines)
+        for decl in sorted(localvars):
             prnt('  %s;' % (decl,))
         #
         if not isinstance(tp.result, model.VoidType):
@@ -709,6 +727,7 @@
             context = 'result of %s' % name
             result_decl = '  %s;' % tp.result.get_c_name(' result', context)
             prnt(result_decl)
+            prnt('  PyObject *pyresult;')
         else:
             result_decl = None
             result_code = ''
@@ -742,9 +761,14 @@
         if numargs == 0:
             prnt('  (void)noarg; /* unused */')
         if result_code:
-            prnt('  return %s;' %
+            prnt('  pyresult = %s;' %
                  self._convert_expr_from_c(tp.result, 'result', 'result type'))
+            for freeline in freelines:
+                prnt('  ' + freeline)
+            prnt('  return pyresult;')
         else:
+            for freeline in freelines:
+                prnt('  ' + freeline)
             prnt('  Py_INCREF(Py_None);')
             prnt('  return Py_None;')
         prnt('}')
@@ -851,12 +875,13 @@
         prnt('{')
         prnt('  /* only to generate compile-time warnings or errors */')
         prnt('  (void)p;')
-        for fname, ftype, fbitsize, fqual in tp.enumfields():
+        for fname, ftype, fbitsize, fqual in self._enum_fields(tp):
             try:
                 if ftype.is_integer_type() or fbitsize >= 0:
                     # accept all integers, but complain on float or double
-                    prnt("  (void)((p->%s) | 0);  /* check that '%s.%s' is "
-                         "an integer */" % (fname, cname, fname))
+                    if fname != '':
+                        prnt("  (void)((p->%s) | 0);  /* check that '%s.%s' is "
+                             "an integer */" % (fname, cname, fname))
                     continue
                 # only accept exactly the type declared, except that '[]'
                 # is interpreted as a '*' and so will match any array length.
@@ -906,8 +931,7 @@
         flags = '|'.join(flags) or '0'
         c_fields = []
         if reason_for_not_expanding is None:
-            expand_anonymous_struct_union = not self.target_is_python
-            enumfields = list(tp.enumfields(expand_anonymous_struct_union))
+            enumfields = list(self._enum_fields(tp))
             for fldname, fldtype, fbitsize, fqual in enumfields:
                 fldtype = self._field_type(tp, fldname, fldtype)
                 self._check_not_opaque(fldtype,
@@ -1215,7 +1239,8 @@
             size_of_result = '(int)sizeof(%s)' % (
                 tp.result.get_c_name('', context),)
         prnt('static struct _cffi_externpy_s _cffi_externpy__%s =' % name)
-        prnt('  { "%s.%s", %s };' % (self.module_name, name, size_of_result))
+        prnt('  { "%s.%s", %s, 0, 0 };' % (
+            self.module_name, name, size_of_result))
         prnt()
         #
         arguments = []
@@ -1286,14 +1311,28 @@
     def _print_string_literal_in_array(self, s):
         prnt = self._prnt
         prnt('// # NB. this is not a string because of a size limit in MSVC')
+        if not isinstance(s, bytes):    # unicode
+            s = s.encode('utf-8')       # -> bytes
+        else:
+            s.decode('utf-8')           # got bytes, check for valid utf-8
+        try:
+            s.decode('ascii')
+        except UnicodeDecodeError:
+            s = b'# -*- encoding: utf8 -*-\n' + s
         for line in s.splitlines(True):
-            prnt(('// ' + line).rstrip())
+            comment = line
+            if type('//') is bytes:     # python2
+                line = map(ord, line)   #     make a list of integers
+            else:                       # python3
+                # type(line) is bytes, which enumerates like a list of integers
+                comment = ascii(comment)[1:-1]
+            prnt(('// ' + comment).rstrip())
             printed_line = ''
             for c in line:
                 if len(printed_line) >= 76:
                     prnt(printed_line)
                     printed_line = ''
-                printed_line += '%d,' % (ord(c),)
+                printed_line += '%d,' % (c,)
             prnt(printed_line)
 
     # ----------
diff --git a/cffi/setuptools_ext.py b/cffi/setuptools_ext.py
index df5a518..8fe3614 100644
--- a/cffi/setuptools_ext.py
+++ b/cffi/setuptools_ext.py
@@ -84,11 +84,13 @@
 
     On Windows, with CPython <= 3.4, it's better not to use py_limited_api
     because virtualenv *still* doesn't copy PYTHON3.DLL on these versions.
-    For now we'll skip py_limited_api on all Windows versions to avoid an
-    inconsistent mess.
+    Recently (2020) we started shipping only >= 3.5 wheels, though.  So
+    we'll give it another try and set py_limited_api on Windows >= 3.5.
     """
+    from cffi import recompiler
+
     if ('py_limited_api' not in kwds and not hasattr(sys, 'gettotalrefcount')
-            and sys.platform != 'win32'):
+            and recompiler.USE_LIMITED_API):
         import setuptools
         try:
             setuptools_major_version = int(setuptools.__version__.partition('.')[0])
diff --git a/cffi/vengine_cpy.py b/cffi/vengine_cpy.py
index 536f11f..6de0df0 100644
--- a/cffi/vengine_cpy.py
+++ b/cffi/vengine_cpy.py
@@ -275,22 +275,23 @@
             tovar, tp.get_c_name(''), errvalue))
         self._prnt('    %s;' % errcode)
 
-    def _extra_local_variables(self, tp, localvars):
+    def _extra_local_variables(self, tp, localvars, freelines):
         if isinstance(tp, model.PointerType):
             localvars.add('Py_ssize_t datasize')
+            localvars.add('struct _cffi_freeme_s *large_args_free = NULL')
+            freelines.add('if (large_args_free != NULL)'
+                          ' _cffi_free_array_arguments(large_args_free);')
 
     def _convert_funcarg_to_c_ptr_or_array(self, tp, fromvar, tovar, errcode):
         self._prnt('  datasize = _cffi_prepare_pointer_call_argument(')
         self._prnt('      _cffi_type(%d), %s, (char **)&%s);' % (
             self._gettypenum(tp), fromvar, tovar))
         self._prnt('  if (datasize != 0) {')
-        self._prnt('    if (datasize < 0)')
-        self._prnt('      %s;' % errcode)
-        self._prnt('    %s = alloca((size_t)datasize);' % (tovar,))
-        self._prnt('    memset((void *)%s, 0, (size_t)datasize);' % (tovar,))
-        self._prnt('    if (_cffi_convert_array_from_object('
-                   '(char *)%s, _cffi_type(%d), %s) < 0)' % (
-            tovar, self._gettypenum(tp), fromvar))
+        self._prnt('    %s = ((size_t)datasize) <= 640 ? '
+                   'alloca((size_t)datasize) : NULL;' % (tovar,))
+        self._prnt('    if (_cffi_convert_array_argument(_cffi_type(%d), %s, '
+                   '(char **)&%s,' % (self._gettypenum(tp), fromvar, tovar))
+        self._prnt('            datasize, &large_args_free) < 0)')
         self._prnt('      %s;' % errcode)
         self._prnt('  }')
 
@@ -369,15 +370,17 @@
             prnt('  %s;' % type.get_c_name(' x%d' % i, context))
         #
         localvars = set()
+        freelines = set()
         for type in tp.args:
-            self._extra_local_variables(type, localvars)
-        for decl in localvars:
+            self._extra_local_variables(type, localvars, freelines)
+        for decl in sorted(localvars):
             prnt('  %s;' % (decl,))
         #
         if not isinstance(tp.result, model.VoidType):
             result_code = 'result = '
             context = 'result of %s' % name
             prnt('  %s;' % tp.result.get_c_name(' result', context))
+            prnt('  PyObject *pyresult;')
         else:
             result_code = ''
         #
@@ -409,9 +412,14 @@
         if numargs == 0:
             prnt('  (void)noarg; /* unused */')
         if result_code:
-            prnt('  return %s;' %
+            prnt('  pyresult = %s;' %
                  self._convert_expr_from_c(tp.result, 'result', 'result type'))
+            for freeline in freelines:
+                prnt('  ' + freeline)
+            prnt('  return pyresult;')
         else:
+            for freeline in freelines:
+                prnt('  ' + freeline)
             prnt('  Py_INCREF(Py_None);')
             prnt('  return Py_None;')
         prnt('}')
@@ -754,7 +762,7 @@
         if isinstance(tp, model.ArrayType):
             tp_ptr = model.PointerType(tp.item)
             self._generate_cpy_const(False, name, tp, vartp=tp_ptr,
-                                     size_too = (tp.length == '...'))
+                                     size_too = tp.length_is_unknown())
         else:
             tp_ptr = model.PointerType(tp)
             self._generate_cpy_const(False, name, tp_ptr, category='var')
@@ -766,7 +774,7 @@
         value = getattr(library, name)
         if isinstance(tp, model.ArrayType):   # int a[5] is "constant" in the
                                               # sense that "a=..." is forbidden
-            if tp.length == '...':
+            if tp.length_is_unknown():
                 assert isinstance(value, tuple)
                 (value, size) = value
                 BItemType = self.ffi._get_cached_btype(tp.item)
@@ -981,6 +989,59 @@
     return PyBool_FromLong(was_alive);
 }
 
+union _cffi_union_alignment_u {
+    unsigned char m_char;
+    unsigned short m_short;
+    unsigned int m_int;
+    unsigned long m_long;
+    unsigned long long m_longlong;
+    float m_float;
+    double m_double;
+    long double m_longdouble;
+};
+
+struct _cffi_freeme_s {
+    struct _cffi_freeme_s *next;
+    union _cffi_union_alignment_u alignment;
+};
+
+#ifdef __GNUC__
+  __attribute__((unused))
+#endif
+static int _cffi_convert_array_argument(CTypeDescrObject *ctptr, PyObject *arg,
+                                        char **output_data, Py_ssize_t datasize,
+                                        struct _cffi_freeme_s **freeme)
+{
+    char *p;
+    if (datasize < 0)
+        return -1;
+
+    p = *output_data;
+    if (p == NULL) {
+        struct _cffi_freeme_s *fp = (struct _cffi_freeme_s *)PyObject_Malloc(
+            offsetof(struct _cffi_freeme_s, alignment) + (size_t)datasize);
+        if (fp == NULL)
+            return -1;
+        fp->next = *freeme;
+        *freeme = fp;
+        p = *output_data = (char *)&fp->alignment;
+    }
+    memset((void *)p, 0, (size_t)datasize);
+    return _cffi_convert_array_from_object(p, ctptr, arg);
+}
+
+#ifdef __GNUC__
+  __attribute__((unused))
+#endif
+static void _cffi_free_array_arguments(struct _cffi_freeme_s *freeme)
+{
+    do {
+        void *p = (void *)freeme;
+        freeme = freeme->next;
+        PyObject_Free(p);
+    } while (freeme != NULL);
+}
+
 static int _cffi_init(void)
 {
     PyObject *module, *c_api_object = NULL;
diff --git a/cffi/vengine_gen.py b/cffi/vengine_gen.py
index a64ff64..2642152 100644
--- a/cffi/vengine_gen.py
+++ b/cffi/vengine_gen.py
@@ -565,7 +565,7 @@
 
     def _generate_gen_variable_decl(self, tp, name):
         if isinstance(tp, model.ArrayType):
-            if tp.length == '...':
+            if tp.length_is_unknown():
                 prnt = self._prnt
                 funcname = '_cffi_sizeof_%s' % (name,)
                 self.export_symbols.append(funcname)
@@ -584,7 +584,7 @@
     def _loaded_gen_variable(self, tp, name, module, library):
         if isinstance(tp, model.ArrayType):   # int a[5] is "constant" in the
                                               # sense that "a=..." is forbidden
-            if tp.length == '...':
+            if tp.length_is_unknown():
                 funcname = '_cffi_sizeof_%s' % (name,)
                 BFunc = self.ffi._typeof_locked('size_t(*)(void)')[0]
                 function = module.load_function(BFunc, funcname)
diff --git a/cffi/verifier.py b/cffi/verifier.py
index 59b78c2..a500c78 100644
--- a/cffi/verifier.py
+++ b/cffi/verifier.py
@@ -50,7 +50,8 @@
             if tag:
                 raise TypeError("can't specify both 'modulename' and 'tag'")
         else:
-            key = '\x00'.join([sys.version[:3], __version_verifier_modules__,
+            key = '\x00'.join(['%d.%d' % sys.version_info[:2],
+                               __version_verifier_modules__,
                                preamble, flattened_kwds] +
                               ffi._cdefsources)
             if sys.version_info >= (3,):
diff --git a/doc/source/cdef.rst b/doc/source/cdef.rst
index f0bc6ba..0662668 100644
--- a/doc/source/cdef.rst
+++ b/doc/source/cdef.rst
@@ -235,7 +235,12 @@
 byte.  (Note that the packed attribute has no effect on bit fields so
 far, which mean that they may be packed differently than on GCC.
 Also, this has no effect on structs declared with ``"...;"``---more
-about it later in `Letting the C compiler fill the gaps`_.)
+about it later in `Letting the C compiler fill the gaps`_.  In
+particular, if your C source uses other attributes like
+``__attribute__((aligned(16)))``, there is no way to declare this fact
+in the ``cdef()``, but you can generally just declare the struct with
+``"...;"`` as the last field.)
+
 *New in version 1.12:*  In ABI mode, you can also pass ``pack=n``,
 with an integer ``n`` which must be a power of two.  Then the
 alignment of any field is limited to ``n`` if it would otherwise be
@@ -302,6 +307,7 @@
 
 
 .. _loading-libraries:
+.. _dlopen:
 
 ffi.dlopen(): loading libraries in ABI mode
 -------------------------------------------
@@ -364,6 +370,18 @@
 ``ffi.dlopen(None)`` no longer work on Windows; try instead
 ``ffi.dlopen(ctypes.util.find_library('c'))``.
 
+*New in version 1.14:* ``ffi.dlopen(handle)``: instead of a file path,
+you can give an already-opened library handle, as a cdata of type
+``void *``.  Such a call converts this handle into a regular FFI object
+with the functions and global variables declared by ``ffi.cdef()``.
+Useful if you have special needs (e.g. you need the GNU extension
+``dlmopen()``, which you can itself declare and call using a different
+``ffi`` object).  Note that in this variant, ``dlclose()`` is not called
+automatically if the FFI object is garbage-collected (but you can still
+call ``ffi.dlclose()`` explicitly if needed).
+
+
+.. _set_source:
 
 ffibuilder.set_source(): preparing out-of-line modules
 ------------------------------------------------------
@@ -471,10 +489,12 @@
 ``cdef()`` at various places, in order to ask the C compiler to fill
 in the details.  These places are:
 
-*  structure declarations: any ``struct { }`` that ends with "``...;``" as
-   the last "field" is
-   partial: it may be missing fields and/or have them declared out of order.
-   This declaration will be corrected by the compiler.  (But note that you
+*  structure declarations: any ``struct { }`` or ``union { }`` that ends
+   with "``...;``" as the last "field" is partial: it may be missing
+   fields, have them declared out of order, use non-standard alignment,
+   etc.  Precisely, the field offsets, total struct size, and total
+   struct alignment deduced by looking at the ``cdef`` are not relied
+   upon and will instead be corrected by the compiler.  (But note that you
    can only access fields that you declared, not others.)  Any ``struct``
    declaration which doesn't use "``...``" is assumed to be exact, but this is
    checked: you get an error if it is not correct.
@@ -512,14 +532,14 @@
    field; then you would use "``typedef struct { ...; } foo_t;``".
 
 *  array lengths: when used as structure fields or in global variables,
-   arrays can have an unspecified length, as in "``int n[...];``".  The
+   arrays can have an unspecified length, as in "``extern int n[...];``".  The
    length is completed by the C compiler.
-   This is slightly different from "``int n[];``", because the latter
+   This is slightly different from "``extern int n[];``", because the latter
    means that the length is not known even to the C compiler, and thus
    no attempt is made to complete it.  This supports
-   multidimensional arrays: "``int n[...][...];``".
+   multidimensional arrays: "``extern int n[...][...];``".
 
-   *New in version 1.2:* "``int m[][...];``", i.e. ``...`` can be used
+   *New in version 1.2:* "``extern int m[][...];``", i.e. ``...`` can be used
    in the innermost dimensions without being also used in the outermost
    dimension.  In the example given, the length of the ``m`` array is
    assumed not to be known to the C compiler, but the length of every
@@ -568,12 +588,12 @@
 
 For more complex types, you have no choice but be precise.  For example,
 you cannot misdeclare a ``int *`` argument as ``long *``, or a global
-array ``int a[5];`` as ``long a[5];``.  CFFI considers `all types listed
-above`_ as primitive (so ``long long a[5];`` and ``int64_t a[5]`` are
+array ``extern int a[5];`` as ``extern long a[5];``.  CFFI considers `all types listed
+above`_ as primitive (so ``extern long long a[5];`` and ``extern int64_t a[5]`` are
 different declarations).  The reason for that is detailed in `a comment
 about an issue.`__
 
-.. __: https://bitbucket.org/cffi/cffi/issues/265/cffi-doesnt-allow-creating-pointers-to#comment-28406958
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/265#note_50393
 
 
 ffibuilder.compile() etc.: compiling out-of-line modules
@@ -806,7 +826,7 @@
     print lib.mysize
 
 Extra arguments to ``ffi.verify()``:
-    
+
 *  ``tmpdir`` controls where the C
    files are created and compiled. Unless the ``CFFI_TMPDIR`` environment
    variable is set, the default is
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 3400cd1..33e8c11 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -45,9 +45,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.12'
+version = '1.15'
 # The full version, including alpha/beta/rc tags.
-release = '1.12.2'
+release = '1.15.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/doc/source/embedding.rst b/doc/source/embedding.rst
index 181249c..8020f21 100644
--- a/doc/source/embedding.rst
+++ b/doc/source/embedding.rst
@@ -380,7 +380,7 @@
   ``dlopen("libpythonX.Y.so", RTLD_LAZY|RTLD_GLOBAL)``, which will
   force ``libpythonX.Y.so`` to be loaded first.
 
-.. __: https://bitbucket.org/cffi/cffi/issues/264/
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/264
 
 
 Using multiple CFFI-made DLLs
@@ -522,3 +522,10 @@
 returns an integer, 0 or -1, to tell if the initialization succeeded
 or not.  Currently there is no way to prevent a failing initialization
 from also dumping a traceback and more information to stderr.
+Note that the function ``cffi_start_python()`` is static: it must be
+called from C source written inside ``ffibuilder.set_source()``.  To
+call it from somewhere else, you need to make a function (with a
+different non-static name) in the ``ffibuilder.set_source()`` that just
+calls ``cffi_start_python()``.  The reason it is static is to avoid
+naming conflicts in case you are ultimately trying to link a large C
+program with more than one cffi embedded module in it.
diff --git a/doc/source/goals.rst b/doc/source/goals.rst
index 0fda659..df4877c 100644
--- a/doc/source/goals.rst
+++ b/doc/source/goals.rst
@@ -55,8 +55,8 @@
 Comments and bugs
 -----------------
 
-The best way to contact us is on the IRC ``#pypy`` channel of
-``irc.freenode.net``.  Feel free to discuss matters either there or in
+The best way to contact us is on the IRC ``#cffi`` or ``#pypy`` channels of
+``irc.libera.chat``.  Feel free to discuss matters either there or in
 the `mailing list`_.  Please report to the `issue tracker`_ any bugs.
 
 As a general rule, when there is a design issue to resolve, we pick the
@@ -65,5 +65,5 @@
 
 --- the authors, Armin Rigo and Maciej Fijalkowski
 
-.. _`issue tracker`: https://bitbucket.org/cffi/cffi/issues
+.. _`issue tracker`: https://foss.heptapod.net/pypy/cffi/issues
 .. _`mailing list`: https://groups.google.com/forum/#!forum/python-cffi
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 1126318..54934f2 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -17,6 +17,3 @@
    ref
    cdef
    embedding
-
-
-
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 4af17d5..6d55eb5 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -17,7 +17,7 @@
 libffi, so it depends on libffi being bug-free; this may not be fully
 the case on some of the more exotic platforms.)
 
-CFFI supports CPython 2.6, 2.7, 3.x (tested with 3.2 to 3.4); and is
+CFFI supports CPython 2.7, 3.x (tested with 3.6 to 3.9); and is
 distributed with PyPy (CFFI 1.0 is distributed with and requires
 PyPy 2.6).
 
@@ -31,7 +31,7 @@
 
 Requirements:
 
-* CPython 2.6 or 2.7 or 3.x, or PyPy (PyPy 2.0 for the earliest
+* CPython 2.7 or 3.x, or PyPy (PyPy 2.0 for the earliest
   versions of CFFI; or PyPy 2.6 for CFFI 1.0).
 
 * in some cases you need to be able to compile C extension modules.
@@ -52,26 +52,26 @@
 
 * https://pypi.python.org/pypi/cffi
 
-* Checksums of the "source" package version 1.12.2:
+* Checksums of the "source" package version 1.15.0:
 
-   - MD5: 4d7dcb6c7c738c15d2ece9bd4c5f86da
+   - MD5: f3a3f26cd3335fc597479c9475da0a0b
 
-   - SHA: 5f579d4980cbcc8aac592721f714ef6a64370ab1
+   - SHA1: 9c51c29e35510adf7f94542e1f8e05611930b07b
 
-   - SHA256: e113878a446c6228669144ae8a56e268c91b7f1fafae927adc4879d9849e0ea7
+   - SHA256: 920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954
 
-* Or grab the most current version from the `Bitbucket page`_:
-  ``hg clone https://bitbucket.org/cffi/cffi``
+* Or grab the most current version from the `Heptapod page`_:
+  ``hg clone https://foss.heptapod.net/pypy/cffi``
 
 * ``python setup.py install`` or ``python setup_base.py install``
   (should work out of the box on Linux or Windows; see below for
-  `MacOS X`_ or `Windows 64`_.)
+  `MacOS X`_.)
 
 * running the tests: ``py.test  c/  testing/`` (if you didn't
   install cffi yet, you need first ``python setup_base.py build_ext -f
   -i``)
 
-.. _`Bitbucket page`: https://bitbucket.org/cffi/cffi
+.. _`Heptapod page`: https://foss.heptapod.net/pypy/cffi
 
 Demos:
 
@@ -82,9 +82,9 @@
   ultimate reference is given by the tests, notably
   `testing/cffi1/test_verify1.py`_ and `testing/cffi0/backend_tests.py`_.
 
-.. _`demo`: https://bitbucket.org/cffi/cffi/src/default/demo
-.. _`testing/cffi1/test_verify1.py`: https://bitbucket.org/cffi/cffi/src/default/testing/cffi1/test_verify1.py
-.. _`testing/cffi0/backend_tests.py`: https://bitbucket.org/cffi/cffi/src/default/testing/cffi0/backend_tests.py
+.. _`demo`: https://foss.heptapod.net/pypy/cffi/-/tree/branch/default/demo
+.. _`testing/cffi1/test_verify1.py`: https://foss.heptapod.net/pypy/cffi/-/blob/branch/default/testing/cffi1/test_verify1.py
+.. _`testing/cffi0/backend_tests.py`: https://foss.heptapod.net/pypy/cffi/-/blob/branch/default/testing/cffi0/backend_tests.py
 
 
 Platform-specific instructions
@@ -133,39 +133,23 @@
 .. _here: http://superuser.com/questions/259278/python-2-6-1-pycrypto-2-3-pypi-package-broken-pipe-during-build
 
 
-Windows (regular 32-bit)
-++++++++++++++++++++++++
+Windows (32/64-bit)
++++++++++++++++++++
 
-Win32 works and is tested at least each official release.
+Win32 and Win64 work and are tested at least each official release.
 
 The recommended C compiler compatible with Python 2.7 is this one:
 http://www.microsoft.com/en-us/download/details.aspx?id=44266
-There is a known problem with distutils on Python 2.7, as 
-explained in https://bugs.python.org/issue23246, and the same 
+There is a known problem with distutils on Python 2.7, as
+explained in https://bugs.python.org/issue23246, and the same
 problem applies whenever you want to run compile() to build a dll with
-this specific compiler suite download. 
+this specific compiler suite download.
 ``import setuptools`` might help, but YMMV
 
 For Python 3.4 and beyond:
 https://www.visualstudio.com/en-us/downloads/visual-studio-2015-ctp-vs
 
 
-Windows 64
-++++++++++
-
-Win64 received very basic testing and we applied a few essential
-fixes in cffi 0.7. The comment above applies for Python 2.7 on 
-Windows 64 as well. Please report any other issue.
-
-Note as usual that this is only about running the 64-bit version of
-Python on the 64-bit OS.  If you're running the 32-bit version (the
-common case apparently), then you're running Win32 as far as we're
-concerned.
-
-.. _`issue 9`: https://bitbucket.org/cffi/cffi/issue/9
-.. _`Python issue 7546`: http://bugs.python.org/issue7546
-
-
 Linux and OS/X: UCS2 versus UCS4
 ++++++++++++++++++++++++++++++++
 
diff --git a/doc/source/overview.rst b/doc/source/overview.rst
index bcc5663..dbc3540 100644
--- a/doc/source/overview.rst
+++ b/doc/source/overview.rst
@@ -3,7 +3,7 @@
 =======================================================
 
 .. contents::
-   
+
 
 The first section presents a simple working
 example of using CFFI to call a C function in a compiled shared object
@@ -61,9 +61,9 @@
 
 Execute this script.  If everything is OK, it should produce
 ``_pi_cffi.c``, and then invoke the compiler on it.  The produced
-``_pi_cffi.c`` contains a copy of the string given in ``set_source()``,
+``_pi_cffi.c`` contains a copy of the string given in :ref:`set_source() <set_source>`,
 in this example the ``#include "pi.h"``. Afterwards, it contains glue code
-for all the functions, types and globals declared in the ``cdef()`` above.
+for all the functions, types and globals declared in the :ref:`cdef() <cdef>` above.
 
 At runtime, you use the extension module like this:
 
@@ -126,7 +126,7 @@
     >>> ffi = FFI()
     >>> ffi.cdef("""
     ...     int printf(const char *format, ...);   // copy-pasted from the man page
-    ... """)                                  
+    ... """)
     >>> C = ffi.dlopen(None)                     # loads the entire C namespace
     >>> arg = ffi.new("char[]", b"world")        # equivalent to C code: char arg[] = "world";
     >>> C.printf(b"hi there, %s.\n", arg)        # call printf
@@ -138,7 +138,7 @@
 ``str`` (or a ``unicode`` on Python 2) you need to encode it explicitly
 with ``somestring.encode(myencoding)``.
 
-*Python 3 on Windows:* ``ffi.dlopen(None)`` does not work.  This problem
+*Python 3 on Windows:* :ref:`ffi.dlopen(None) <dlopen>` does not work.  This problem
 is messy and not really fixable.  The problem does not occur if you try
 to call a function from a specific DLL that exists on your system: then
 you use ``ffi.dlopen("path.dll")``.
@@ -179,7 +179,7 @@
     f.close()
 
 This can be used as a more flexible replacement of the struct_ and
-array_ modules, and replaces ctypes_.  You could also call ``ffi.new("pixel_t[600][800]")``
+array_ modules, and replaces ctypes_.  You could also call :ref:`ffi.new("pixel_t[600][800]") <new>`
 and get a two-dimensional array.
 
 .. _struct: http://docs.python.org/library/struct.html
@@ -191,7 +191,7 @@
 This example also admits an out-of-line equivalent.  It is similar to
 the first example `Main mode of usage`_ above,
 but passing ``None`` as the second argument to
-``ffibuilder.set_source()``.  Then in the main program you write
+:ref:`ffibuilder.set_source() <set_source>`.  Then in the main program you write
 ``from _simple_example import ffi`` and then the same content as the
 in-line example above starting from the line ``image =
 ffi.new("pixel_t[]", 800*600)``.
@@ -244,7 +244,7 @@
 "source code" into the file ``_example.c`` and compile this to a
 regular C extension module.  (CFFI selects either Python or C for the
 module to generate based on whether the second argument to
-``set_source()`` is ``None`` or not.)
+:ref:`set_source() <set_source>` is ``None`` or not.)
 
 *You need a C compiler for this single step.  It produces a file called
 e.g. _example.so or _example.pyd.  If needed, it can be distributed in
@@ -265,7 +265,7 @@
 passwd`` (it is "API level", as opposed to "ABI level").  It requires
 a C compiler in order to run ``example_build.py``, but it is much more
 portable than trying to get the details of the fields of ``struct
-passwd`` exactly right.  Similarly, in the ``cdef()`` we declared
+passwd`` exactly right.  Similarly, in the :ref:`cdef() <cdef>` we declared
 ``getpwuid()`` as taking an ``int`` argument; on some platforms this
 might be slightly incorrect---but it does not matter.
 
@@ -301,28 +301,28 @@
       /* filename: pi.c*/
       # include <stdlib.h>
       # include <math.h>
-       
+
       /* Returns a very crude approximation of Pi
          given a int: a number of iteration */
       float pi_approx(int n){
-      
+
         double i,x,y,sum=0;
-      
+
         for(i=0;i<n;i++){
-      
+
           x=rand();
           y=rand();
-      
+
           if (sqrt(x*x+y*y) < sqrt((double)RAND_MAX*RAND_MAX))
             sum++; }
-      
+
         return 4*(float)sum/(float)n; }
 
    .. code-block:: C
 
       /* filename: pi.h*/
       float pi_approx(int n);
-      
+
 Create a script named ``pi_extension_build.py``, building
 the C extension:
 
@@ -330,21 +330,21 @@
 
       from cffi import FFI
       ffibuilder = FFI()
-      
+
       ffibuilder.cdef("float pi_approx(int n);")
-   
+
       ffibuilder.set_source("_pi",  # name of the output C extension
       """
-          #include "pi.h"',
+          #include "pi.h"
       """,
           sources=['pi.c'],   # includes pi.c as additional sources
           libraries=['m'])    # on Unix, link with the math library
-   
+
       if __name__ == "__main__":
           ffibuilder.compile(verbose=True)
 
 Build the extension:
-   
+
    .. code-block:: shell
 
       python pi_extension_build.py
@@ -354,14 +354,14 @@
 Linux for example).  It can be called from Python:
 
    .. code-block:: python
-   
+
        from _pi.lib import pi_approx
-   
+
        approx = pi_approx(10)
-       assert str(pi_approximation).startswith("3.")
-   
+       assert str(approx).startswith("3.")
+
        approx = pi_approx(10000)
-       assert str(approx).startswith("3.1")  
+       assert str(approx).startswith("3.1")
 
 
 .. _performance:
@@ -428,7 +428,7 @@
 This mixture mode lets you massively reduces the import times, because
 it is slow to parse a large C header.  It also allows you to do more
 detailed checkings during build-time without worrying about performance
-(e.g. calling ``cdef()`` many times with small pieces of declarations,
+(e.g. calling :ref:`cdef() <cdef>` many times with small pieces of declarations,
 based on the version of libraries detected on the system).
 
 .. code-block:: python
@@ -461,7 +461,7 @@
 
     lib.printf(b"hi there, number %d\n", ffi.cast("int", 2))
 
-Note that this ``ffi.dlopen()``, unlike the one from in-line mode,
+Note that this :ref:`ffi.dlopen() <dlopen>`, unlike the one from in-line mode,
 does not invoke any additional magic to locate the library: it must be
 a path name (with or without a directory), as required by the C
 ``dlopen()`` or ``LoadLibrary()`` functions.  This means that
@@ -496,7 +496,7 @@
 
 The "API level + in-line" mode combination exists but is long
 deprecated.  It used to be done with ``lib = ffi.verify("C header")``.
-The out-of-line variant with ``set_source("modname", "C header")`` is
+The out-of-line variant with :ref:`set_source("modname", "C header") <set_source>` is
 preferred and avoids a number of problems when the project grows in
 size.
 
@@ -558,13 +558,13 @@
 from the man pages.
 
 The declarations can contain **types, functions, constants**
-and **global variables.** What you pass to the ``cdef()`` must not
+and **global variables.** What you pass to the :ref:`cdef() <cdef>` must not
 contain more than that; in particular, ``#ifdef`` or ``#include``
 directives are not supported.  The cdef in the above examples are just
 that - they declared "there is a function in the C level with this
 given signature", or "there is a struct type with this shape".
 
-In the ABI examples, the ``dlopen()`` calls load libraries manually.
+In the ABI examples, the :ref:`dlopen() <dlopen>` calls load libraries manually.
 At the binary level, a program is split into multiple namespaces---a
 global one (on some platforms), plus one namespace per library.  So
 ``dlopen()`` returns a ``<FFILibrary>`` object, and this object has
@@ -576,13 +576,13 @@
 By opposition, the API mode works more closely like a C program: the C
 linker (static or dynamic) is responsible for finding any symbol used.
 You name the libraries in the ``libraries`` keyword argument to
-``set_source()``, but never need to say which symbol comes
+:ref:`set_source() <set_source>`, but never need to say which symbol comes
 from which library.
 Other common arguments to ``set_source()`` include ``library_dirs`` and
 ``include_dirs``; all these arguments are passed to the standard
 distutils/setuptools.
 
-The ``ffi.new()`` lines allocate C objects.  They are filled
+The :ref:`ffi.new() <new>` lines allocate C objects.  They are filled
 with zeroes initially, unless the optional second argument is used.
 If specified, this argument gives an "initializer", like you can use
 with C code to initialize global variables.
@@ -609,10 +609,10 @@
 libraries are typically meant to be used with a C compiler.* You are not
 supposed to do things like guess where fields are in the structures.
 The "real example" above shows how CFFI uses a C compiler under the
-hood: this example uses ``set_source(..., "C source...")`` and never
-``dlopen()``.  When using this approach,
+hood: this example uses :ref:`set_source(..., "C source...") <set_source>` and never
+:ref:`dlopen() <dlopen>`.  When using this approach,
 we have the advantage that we can use literally "``...``" at various places in
-the ``cdef()``, and the missing information will be completed with the
+the :ref:`cdef() <cdef>`, and the missing information will be completed with the
 help of the C compiler.  CFFI will turn this into a single C source file,
 which contains the "C source" part unmodified, followed by some
 "magic" C code and declarations derived from the ``cdef()``.  When
diff --git a/doc/source/ref.rst b/doc/source/ref.rst
index 3dc8e4b..05c0f7c 100644
--- a/doc/source/ref.rst
+++ b/doc/source/ref.rst
@@ -30,6 +30,8 @@
 confuse it with ``ffi.errno``.)
 
 
+.. _new:
+
 ffi.new()
 +++++++++
 
@@ -37,7 +39,7 @@
 allocate an instance according to the specified C type and return a
 pointer to it.  The specified C type must be either a pointer or an
 array: ``new('X *')`` allocates an X and returns a pointer to it,
-whereas ``new('X[n]')`` allocates an array of n X'es and returns an
+whereas ``new('X[10]')`` allocates an array of 10 X'es and returns an
 array referencing it (which works mostly like a pointer, like in C).
 You can also use ``new('X[]', n)`` to allocate an array of a
 non-constant length n.  See the `detailed documentation`__ for other
@@ -51,9 +53,13 @@
 data can be used as long as this object is kept alive, but must not be
 used for a longer time.  Be careful about that when copying the
 pointer to the memory somewhere else, e.g. into another structure.
-Also, this means that a line like ``x = ffi.new(...)[0]`` is *always
-wrong:* the newly allocated object goes out of scope instantly, and so
-is freed immediately, and ``x`` is garbage.
+Also, this means that a line like ``x = ffi.cast("B *", ffi.new("A *"))``
+or ``x = ffi.new("struct s[1]")[0]`` is wrong: the newly allocated object
+goes out of scope instantly, and so is freed immediately, and ``x`` is
+garbage.  The only case where this is fine comes from a special case for
+pointers-to-struct and pointers-to-union types: after
+``p = ffi.new("struct-or-union *", ..)``, then either ``p`` or ``p[0]``
+keeps the memory alive.
 
 The returned memory is initially cleared (filled with zeroes), before
 the optional initializer is applied.  For performance, see
@@ -231,7 +237,8 @@
 *New in version 1.12:* added the optional *first* argument ``cdecl``, and
 the keyword argument ``require_writable``:
 
-* ``cdecl`` defaults to ``"char[]"``, but a different array type can be
+* ``cdecl`` defaults to ``"char[]"``, but a different array
+  or (from version 1.13) pointer type can be
   specified for the result.  A value like ``"int[]"`` will return an array of
   ints instead of chars, and its length will be set to the number of ints
   that fit in the buffer (rounded down if the division is not exact).  Values
@@ -243,6 +250,12 @@
   keeps the underlying Python object alive and locked.  (In addition,
   ``ffi.from_buffer("int[]", x)`` gives better array bound checking.)
 
+  *New in version 1.13:* ``cdecl`` can be a pointer type.  If it points
+  to a struct or union, you can, as usual, write ``p.field`` instead of
+  ``p[0].field``.  You can also access ``p[n]``; note that CFFI does not
+  perform any bounds checking in this case.  Note also that ``p[0]`` cannot
+  be used to keep the buffer alive (unlike what occurs with ``ffi.new()``).
+
 * if ``require_writable`` is set to True, the function fails if the buffer
   obtained from ``python_buffer`` is read-only (e.g. if ``python_buffer`` is
   a byte string).  The exact exception is raised by the object itself, and
@@ -297,7 +310,7 @@
 e.g.:
 
 .. code-block:: python
-  
+
     def myfunction(ptr):
         assert ffi.typeof(ptr) is ffi.typeof("foo_t*")
         ...
@@ -448,7 +461,7 @@
 order, see the discussion in `issue 340`__.
 
 .. __: http://bugs.python.org/issue31105
-.. __: https://bitbucket.org/cffi/cffi/issues/340/resources-release-issues
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/340
 
 
 .. _ffi-new-handle:
@@ -608,6 +621,14 @@
     with my_new("int[]", n) as my_array:
         ...
 
+**Warning:** due to a bug, ``p = ffi.new_allocator(..)("struct-or-union *")``
+might not follow the rule that either ``p`` or ``p[0]`` keeps the memory
+alive, which holds for the normal ``ffi.new("struct-or-union *")`` allocator.
+It may sometimes be the case that if there is only a reference to ``p[0]``,
+the memory is freed.  The cause is that the rule doesn't hold for
+``ffi.gc()``, which is sometimes used in the implementation of
+``ffi.new_allocator()()``; this might be fixed in a future release.
+
 
 .. _ffi-release:
 
@@ -712,7 +733,7 @@
            raise IndexError("index too large!")
        ...
 
-.. __: https://bitbucket.org/cffi/cffi/issues/233/
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/233
 
 
 .. _ffi-getctype:
diff --git a/doc/source/using.rst b/doc/source/using.rst
index ff8e5f1..38c96ba 100644
--- a/doc/source/using.rst
+++ b/doc/source/using.rst
@@ -459,7 +459,7 @@
 if necessary with ``ffi.cast()``:
 
 .. code-block:: python
-  
+
     lib.printf("hello, %d\n", ffi.cast("int", 42))
     lib.printf("hello, %ld\n", ffi.cast("long", 42))
     lib.printf("hello, %f\n", ffi.cast("double", 42))
@@ -787,7 +787,7 @@
 may be implemented in the future.  (`This demo`__ shows how to do it
 anyway, but it is a bit lengthy.)
 
-.. __: https://bitbucket.org/cffi/cffi/src/default/demo/extern_python_varargs.py
+.. __: https://foss.heptapod.net/pypy/cffi/-/blob/branch/default/demo/extern_python_varargs.py
 
 Each corresponding Python callback function is defined with the
 ``@ffi.def_extern()`` decorator.  Be careful when writing this
@@ -876,11 +876,27 @@
       protections can interfere (for example, on SELinux you need to
       run with ``deny_execmem`` set to ``off``).
 
-    Note also that a cffi fix for the latter issue was attempted---see
+    - `On Mac OS X,`__ you need to give your application the entitlement
+      ``com.apple.security.cs.allow-unsigned-executable-memory``.
+
+    Note also that a cffi fix for this issue was attempted---see
     the ``ffi_closure_alloc`` branch---but was not merged because it
     creates potential `memory corruption`__ with ``fork()``.
 
+    In other words: yes, it is dangerous to allow write+execute memory in your
+    program; that's why the various "hardening" options above exist.  But at
+    the same time, these options open wide the door to another attack: if the
+    program forks and then attempts to call any of the ``ffi.callback()``, then
+    this immediately results in a crash---or, with a minimal amount of work
+    from an attacker, arbitrary code execution.  To me it sounds even more
+    dangerous than the original problem, and that's why cffi is not playing
+    along.
+
+    To fix the issue once and for all on the affected platforms, you need
+    to refactor the involved code so that it no longer uses ``ffi.callback()``.
+
 .. __: https://github.com/pyca/pyopenssl/issues/596
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/391
 .. __: https://bugzilla.redhat.com/show_bug.cgi?id=1249685
 
 Warning: like ffi.new(), ffi.callback() returns a cdata that has
@@ -950,7 +966,7 @@
     ffibuilder.compile(verbose=True)
 
 .. code-block:: python
-    
+
     # file "example.py"
 
     from _example import ffi, lib
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 18bb218..aa7f2fe 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -2,15 +2,191 @@
 What's New
 ======================
 
+v1.15.0
+=======
+
+* Fixed MANIFEST.in to include missing file for Windows arm64 support
+
+* Fixed Linux wheel build to use gcc default ISA for libffi
+
+* Updated setup.py Python trove specifiers to currently-tested Python versions
+
+* CPython 3.10 support (including wheels)
+
+* MacOS arm64 support (including wheels)
+
+* Initial Windows arm64 support
+
+* Misc. doc and test updates
+
+v1.14.6
+=======
+
+* Test fixes for CPython 3.10.0b3
+
+* Support for `sys.unraisablehook()` on Python >= 3.8
+
+* Fix two minor memory leaks (thanks Sebastian!)
+
+* Like many projects that had an IRC channel on freenode, we moved it to
+  ``irc.libera.chat``.
+
+v1.14.5
+=======
+
+* Source fix for old gcc versions
+
+* This and future releases should include wheels on more platforms,
+  thanks to our new release managers Matt and Matt!
+
+v1.14.4
+=======
+
+Release done for pip reasons.
+
+v1.14.3
+=======
+
+Release done for pip reasons.
+
+v1.14.2
+=======
+
+* CPython 3 on Windows: we again try to compile with ``Py_LIMITED_API``
+  by default.  This flag is not added if you run the compilation with
+  CPython 3.4, as it only works with CPython >= 3.5, but by now this
+  version of Python is quite old (and we no longer distribute cffi
+  wheels for it).
+
+  This may require that you upgrade ``virtualenv`` (requires version 16
+  or newer) or at least copy manually ``python3.dll`` into your existing
+  virtualenvs.  For distributing wheels with your cffi modules, you may
+  also need to upgrade ``wheel`` to the just-released version 0.35.
+
+  You can manually disable ``Py_LIMITED_API`` by calling
+  ``ffi.set_source(..., py_limited_api=False)``.
+
+
+v1.14.1
+=======
+
+* CFFI source code is now `hosted on Heptapod`_.
+
+* Improved support for ``typedef int my_array_t[...];`` with an explicit
+  dot-dot-dot in API mode (`issue #453`_)
+
+* Windows (32 and 64 bits): multiple fixes for ABI-mode call to functions
+  that return a structure.
+
+* Experimental support for MacOS 11 on aarch64.
+
+* and a few other minor changes and bug fixes.
+
+.. _`hosted on Heptapod`: https://foss.heptapod.net/pypy/cffi/
+.. _`issue #453`: https://foss.heptapod.net/pypy/cffi/issues/453
+
+
+v1.14
+=====
+
+* ``ffi.dlopen()`` can now be called with a handle (as a ``void *``) to an
+  already-opened C library.
+
+* CPython only: fixed a stack overflow issue for calls like
+  ``lib.myfunc([large list])``.  If the function is declared as taking a
+  ``float *`` argument, for example, then the array is temporarily converted
+  into a C array of floats---however, the code used to use ``alloca()`` for
+  this temporary storage, no matter how large.  This is now fixed.
+
+  The fix concerns all modes: in-line/out-of-line API/ABI.  Also note that your
+  API-mode C extension modules need to be regenerated with cffi 1.14 in order
+  to get the fix; i.e. for API mode, the fix is in the generated C sources.
+  (The C sources generated from cffi 1.14 should also work when running in
+  a different environment in which we have an older version of cffi.  Also,
+  this change makes no difference on PyPy.)
+
+  As a workaround that works on all versions of cffi, you can write
+  ``lib.myfunc(ffi.new("float[]", [large list]))``, which is
+  equivalent but explicity builds the intermediate array as a regular
+  Python object on the heap.
+
+* fixed a memory leak inside ``ffi.getwinerror()`` on CPython 3.x.
+
+
+v1.13.2
+=======
+
+* re-release because the Linux wheels came with an attached version of libffi
+  that was very old and buggy (`issue #432`_).
+
+.. _`issue #432`: https://foss.heptapod.net/pypy/cffi/-/issues/432
+
+
+
+v1.13.1
+=======
+
+* deprecate the way to declare in ``cdef()`` a global variable with only
+  ``void *foo;``.  You should always use a storage class, like ``extern void
+  *foo;`` or maybe ``static void *foo;``.  These are all equivalent for
+  the purposes of ``cdef()``, but the reason for deprecating the bare version
+  is that (as far as I know) it would always be mistake in a real C header.
+
+* fix the regression ``RuntimeError: found a situation in which we try
+  to build a type recursively`` (`issue #429`_).
+
+* fixed `issue #427`_ where a multithreading mistake in the embedding logic
+  initialization code would cause deadlocks on CPython 3.7.
+
+.. _`issue #429`: https://foss.heptapod.net/pypy/cffi/-/issues/429
+.. _`issue #427`: https://foss.heptapod.net/pypy/cffi/-/issues/427
+
+
+v1.13
+=====
+
+* ``ffi.from_buffer("type *", ..)`` is now supported, in addition to
+  ``"type[]"``.  You can then write ``p.field`` to access the items, instead
+  of only ``p[0].field``.  Be careful that no bounds checking is performed, so
+  ``p[n]`` might access data out of bounds.
+
+* fix for structs containing unnamed bitfields like ``int : 1;``.
+
+* when calling cdata of "function pointer" type, give a RuntimeError instead
+  of a crash if the pointer happens to be NULL
+
+* support some more binary operations between constants in enum definitions
+  (PR #96)
+
+* silence a warning incorrectly emitted if you use a quote in a preprocessor
+  line
+
+* detect a corner case that would throw the C code into an infinite
+  recursion, with ``ffi.cdef("""struct X { void(*fnptr)(struct X); };""")``
+
+
+Older Versions
+==============
+
+v1.12.3
+-------
+
+* Fix for nested struct types that end in a var-sized array (#405).
+
+* Add support for using ``U`` and ``L`` characters at the end of integer
+  constants in ``ffi.cdef()`` (thanks Guillaume).
+
+* More 3.8 fixes.
+
 
 v1.12.2
-=======
+-------
 
 * Added temporary workaround to compile on CPython 3.8.0a2.
 
 
 v1.12.1
-=======
+-------
 
 * CPython 3 on Windows: we again no longer compile with ``Py_LIMITED_API``
   by default because such modules *still* cannot be used with virtualenv.
@@ -21,12 +197,12 @@
   Like before, `Issue #350`_ mentions a workaround if you still want
   the ``Py_LIMITED_API`` flag and *either* you are not concerned about
   virtualenv *or* you are sure your module will not be used on CPython
-  <= 3.4: pass ``define_macros=[("Py_LIMITED_API", None)]`` to the
+  <= 3.4: pass ``define_macros=[("Py_LIMITED_API", None)]`` as a keyword to the
   ``ffibuilder.set_source()`` call.
 
 
 v1.12
-=====
+-----
 
 * `Direct support for pkg-config`__.
 
@@ -61,12 +237,9 @@
   to 1 byte instead of 4).
 
 .. __: cdef.html#pkgconfig
-.. _`issue #362`: https://bitbucket.org/cffi/cffi/issues/362/
+.. _`issue #362`: https://foss.heptapod.net/pypy/cffi/-/issues/362
 
 
-Older Versions
-==============
-
 v1.11.5
 -------
 
@@ -93,13 +266,13 @@
 * CPython 3 on Windows: we no longer compile with ``Py_LIMITED_API``
   by default because such modules cannot be used with virtualenv.
   `Issue #350`_ mentions a workaround if you still want that and are not
-  concerned about virtualenv: pass a ``define_macros=[("Py_LIMITED_API",
-  None)]`` to the ``ffibuilder.set_source()`` call.
+  concerned about virtualenv: pass ``define_macros=[("Py_LIMITED_API",
+  None)]`` as a keyword to the ``ffibuilder.set_source()`` call.
 
-.. _`Issue #345`: https://bitbucket.org/cffi/cffi/issues/345/
-.. _`Issue #350`: https://bitbucket.org/cffi/cffi/issues/350/
-.. _`Issue #358`: https://bitbucket.org/cffi/cffi/issues/358/
-.. _`Issue #357`: https://bitbucket.org/cffi/cffi/issues/357/
+.. _`Issue #345`: https://foss.heptapod.net/pypy/cffi/-/issues/345
+.. _`Issue #350`: https://foss.heptapod.net/pypy/cffi/-/issues/350
+.. _`Issue #358`: https://foss.heptapod.net/pypy/cffi/-/issues/358
+.. _`Issue #357`: https://foss.heptapod.net/pypy/cffi/-/issues/357
 
 
 v1.11.4
@@ -112,7 +285,7 @@
   ``foo.cp36-win32.pyd``, to make it clear that they are regular
   CPython modules depending on ``python36.dll``.
 
-.. _`Issue #355`: https://bitbucket.org/cffi/cffi/issues/355/
+.. _`Issue #355`: https://foss.heptapod.net/pypy/cffi/-/issues/355
 
 
 v1.11.3
@@ -201,9 +374,9 @@
   that are *slower* to call than the API mode does.  For some reason it
   is often thought to be faster.  It is not!
 
-.. __: https://bitbucket.org/cffi/cffi/issues/321/cffi-191-segmentation-fault-during-self
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/321
 .. __: ref.html#ffi-gc
-.. __: https://bitbucket.org/cffi/cffi/issues/320/improve-memory_pressure-management
+.. __: https://foss.heptapod.net/pypy/cffi/-/issues/320
 .. __: http://bugs.python.org/issue31105
 
 
diff --git a/setup.py b/setup.py
index f980590..5fd1a1c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-import sys, os
+import sys, os, platform
 import subprocess
 import errno
 
@@ -56,7 +56,7 @@
     tries to compile C code.  (Hints: on OS/X 10.8, for errors about
     -mno-fused-madd see http://stackoverflow.com/questions/22313407/
     Otherwise, see https://wiki.python.org/moin/CompLangPython or
-    the IRC channel #python on irc.freenode.net.)
+    the IRC channel #python on irc.libera.chat.)
 
     Trying to continue anyway.  If you are trying to install CFFI from
     a build done in a different context, you can ignore this warning.
@@ -123,51 +123,39 @@
     os.environ['PKG_CONFIG_PATH'] = (
         os.environ.get('PKG_CONFIG_PATH', '') + ':' + pkgconfig)
 
-
-if sys.platform == 'win32' and uses_msvc():
-    COMPILE_LIBFFI = 'c/libffi_msvc'    # from the CPython distribution
-else:
-    COMPILE_LIBFFI = None
-
-if COMPILE_LIBFFI:
-    assert os.path.isdir(COMPILE_LIBFFI), "directory not found!"
-    include_dirs[:] = [COMPILE_LIBFFI]
-    libraries[:] = []
-    _filenames = [filename.lower() for filename in os.listdir(COMPILE_LIBFFI)]
-    _filenames = [filename for filename in _filenames
-                           if filename.endswith('.c')]
-    if sys.maxsize > 2**32:
-        # 64-bit: unlist win32.c, and add instead win64.obj.  If the obj
-        # happens to get outdated at some point in the future, you need to
-        # rebuild it manually from win64.asm.
-        _filenames.remove('win32.c')
-        extra_link_args.append(os.path.join(COMPILE_LIBFFI, 'win64.obj'))
-    sources.extend(os.path.join(COMPILE_LIBFFI, filename)
-                   for filename in _filenames)
+if sys.platform == "win32" and uses_msvc():
+    if platform.machine() == "ARM64":
+        include_dirs.append(os.path.join("c/libffi_arm64/include"))
+        library_dirs.append(os.path.join("c/libffi_arm64"))
+    else:
+        COMPILE_LIBFFI = 'c/libffi_x86_x64'    # from the CPython distribution
+        assert os.path.isdir(COMPILE_LIBFFI), "directory not found!"
+        include_dirs[:] = [COMPILE_LIBFFI]
+        libraries[:] = []
+        _filenames = [filename.lower() for filename in os.listdir(COMPILE_LIBFFI)]
+        _filenames = [filename for filename in _filenames
+                            if filename.endswith('.c')]
+        if sys.maxsize > 2**32:
+            # 64-bit: unlist win32.c, and add instead win64.obj.  If the obj
+            # happens to get outdated at some point in the future, you need to
+            # rebuild it manually from win64.asm.
+            _filenames.remove('win32.c')
+            extra_link_args.append(os.path.join(COMPILE_LIBFFI, 'win64.obj'))
+        sources.extend(os.path.join(COMPILE_LIBFFI, filename)
+                    for filename in _filenames)
 else:
     use_pkg_config()
     ask_supports_thread()
     ask_supports_sync_synchronize()
 
+if 'darwin' in sys.platform:
+    # priority is given to `pkg_config`, but always fall back on SDK's libffi.
+    extra_compile_args += ['-iwithsysroot/usr/include/ffi']
+
 if 'freebsd' in sys.platform:
     include_dirs.append('/usr/local/include')
     library_dirs.append('/usr/local/lib')
 
-if 'darwin' in sys.platform:
-    try:
-        p = subprocess.Popen(['xcrun', '--show-sdk-path'],
-                             stdout=subprocess.PIPE)
-    except OSError as e:
-        if e.errno not in [errno.ENOENT, errno.EACCES]:
-            raise
-    else:
-        t = p.stdout.read().decode().strip()
-        p.stdout.close()
-        if p.wait() == 0:
-            include_dirs.append(t + '/usr/include/ffi')
-
-
-
 if __name__ == '__main__':
     from setuptools import setup, Distribution, Extension
 
@@ -198,7 +186,7 @@
 
 `Mailing list <https://groups.google.com/forum/#!forum/python-cffi>`_
 """,
-        version='1.12.2',
+        version='1.15.0',
         packages=['cffi'] if cpython else [],
         package_data={'cffi': ['_cffi_include.h', 'parse_c_type.h', 
                                '_embedding.h', '_cffi_errors.h']}
@@ -236,15 +224,15 @@
         classifiers=[
             'Programming Language :: Python',
             'Programming Language :: Python :: 2',
-            'Programming Language :: Python :: 2.6',
             'Programming Language :: Python :: 2.7',
             'Programming Language :: Python :: 3',
-            'Programming Language :: Python :: 3.2',
-            'Programming Language :: Python :: 3.3',
-            'Programming Language :: Python :: 3.4',
-            'Programming Language :: Python :: 3.5',
             'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
+            'Programming Language :: Python :: 3.10',
             'Programming Language :: Python :: Implementation :: CPython',
             'Programming Language :: Python :: Implementation :: PyPy',
+            'License :: OSI Approved :: MIT License',
         ],
     )
diff --git a/setup_base.py b/setup_base.py
index 667c7d5..4cf6ea5 100644
--- a/setup_base.py
+++ b/setup_base.py
@@ -8,6 +8,7 @@
 if __name__ == '__main__':
     from distutils.core import setup
     from distutils.extension import Extension
+
     standard = '__pypy__' not in sys.builtin_module_names
     setup(packages=['cffi'],
           requires=['pycparser'],
diff --git a/testing/cffi0/backend_tests.py b/testing/cffi0/backend_tests.py
index 13a4c78..ab013a1 100644
--- a/testing/cffi0/backend_tests.py
+++ b/testing/cffi0/backend_tests.py
@@ -1,6 +1,7 @@
 import py
+import pytest
 import platform
-import sys, ctypes
+import sys, ctypes, ctypes.util
 from cffi import FFI, CDefError, FFIError, VerificationMissing
 from testing.support import *
 
@@ -11,8 +12,8 @@
 SIZE_OF_WCHAR = ctypes.sizeof(ctypes.c_wchar)
 
 def needs_dlopen_none():
-    if sys.platform == 'win32' and sys.version_info >= (3,):
-        py.test.skip("dlopen(None) cannot work on Windows for Python 3")
+    if sys.platform == 'win32' and not ctypes.util.find_library('c'):
+        py.test.skip("dlopen(None) cannot work on Windows with this runtime")
 
 
 class BackendTests:
@@ -112,10 +113,14 @@
         p[9] = 43
         assert p[0] == 42
         assert p[9] == 43
-        py.test.raises(IndexError, "p[10]")
-        py.test.raises(IndexError, "p[10] = 44")
-        py.test.raises(IndexError, "p[-1]")
-        py.test.raises(IndexError, "p[-1] = 44")
+        with pytest.raises(IndexError):
+            p[10]
+        with pytest.raises(IndexError):
+            p[10] = 44
+        with pytest.raises(IndexError):
+            p[-1]
+        with pytest.raises(IndexError):
+            p[-1] = 44
 
     def test_new_array_args(self):
         ffi = FFI(backend=self.Backend())
@@ -140,18 +145,21 @@
         ffi = FFI(backend=self.Backend())
         p = ffi.new("int[]", 10)     # a single integer is the length
         assert p[9] == 0
-        py.test.raises(IndexError, "p[10]")
+        with pytest.raises(IndexError):
+            p[10]
         #
         py.test.raises(TypeError, ffi.new, "int[]")
         #
         p = ffi.new("int[]", [-6, -7])    # a list is all the items, like C
         assert p[0] == -6
         assert p[1] == -7
-        py.test.raises(IndexError, "p[2]")
+        with pytest.raises(IndexError):
+            p[2]
         assert repr(p) == "<cdata 'int[]' owning %d bytes>" % (2*SIZE_OF_INT)
         #
         p = ffi.new("int[]", 0)
-        py.test.raises(IndexError, "p[0]")
+        with pytest.raises(IndexError):
+            p[0]
         py.test.raises(ValueError, ffi.new, "int[]", -1)
         assert repr(p) == "<cdata 'int[]' owning 0 bytes>"
 
@@ -259,7 +267,8 @@
         p[2][3] = 33
         assert p[0][0] == 10
         assert p[2][3] == 33
-        py.test.raises(IndexError, "p[1][-1]")
+        with pytest.raises(IndexError):
+            p[1][-1]
 
     def test_constructor_array_of_array(self):
         ffi = FFI(backend=self.Backend())
@@ -386,7 +395,8 @@
         n = ffi.new("int*", 99)
         p = ffi.new("int*[]", [n])
         assert p[0][0] == 99
-        py.test.raises(TypeError, "p[0] = None")
+        with pytest.raises(TypeError):
+            p[0] = None
         p[0] = ffi.NULL
         assert p[0] == ffi.NULL
 
@@ -422,13 +432,15 @@
         assert s.a == s.b == s.c == 0
         s.b = -23
         assert s.b == -23
-        py.test.raises(OverflowError, "s.b = 32768")
+        with pytest.raises(OverflowError):
+            s.b = 32768
         #
         s = ffi.new("struct foo*", [-2, -3])
         assert s.a == -2
         assert s.b == -3
         assert s.c == 0
-        py.test.raises((AttributeError, TypeError), "del s.a")
+        with pytest.raises((AttributeError, TypeError)):
+            del s.a
         assert repr(s) == "<cdata 'struct foo *' owning %d bytes>" % (
             SIZE_OF_INT + 2 * SIZE_OF_SHORT)
         #
@@ -450,8 +462,10 @@
         assert s[0].a == s[0].b == s[0].c == 0
         s[0].b = -23
         assert s[0].b == s.b == -23
-        py.test.raises(OverflowError, "s[0].b = -32769")
-        py.test.raises(IndexError, "s[1]")
+        with pytest.raises(OverflowError):
+            s[0].b = -32769
+        with pytest.raises(IndexError):
+            s[1]
 
     def test_struct_opaque(self):
         ffi = FFI(backend=self.Backend())
@@ -511,11 +525,13 @@
         u.b = -23
         assert u.b == -23
         assert u.a != 0
-        py.test.raises(OverflowError, "u.b = 32768")
+        with pytest.raises(OverflowError):
+            u.b = 32768
         #
         u = ffi.new("union foo*", [-2])
         assert u.a == -2
-        py.test.raises((AttributeError, TypeError), "del u.a")
+        with pytest.raises((AttributeError, TypeError)):
+            del u.a
         assert repr(u) == "<cdata 'union foo *' owning %d bytes>" % SIZE_OF_INT
 
     def test_union_opaque(self):
@@ -591,7 +607,8 @@
         p[3] = b'\x00'
         assert ffi.string(p) == b"hel"
         assert ffi.string(p, 2) == b"he"
-        py.test.raises(IndexError, "p[7] = b'X'")
+        with pytest.raises(IndexError):
+            p[7] = b'X'
         #
         a = ffi.new("char[]", b"hello\x00world")
         assert len(a) == 12
@@ -615,7 +632,8 @@
         p[3] = u+'\x00'
         assert ffi.string(p) == u+"hel"
         assert ffi.string(p, 123) == u+"hel"
-        py.test.raises(IndexError, "p[7] = u+'X'")
+        with pytest.raises(IndexError):
+            p[7] = u+'X'
         #
         a = ffi.new("wchar_t[]", u+"hello\x00world")
         assert len(a) == 12
@@ -633,7 +651,8 @@
         s = ffi.new("struct foo*", [t])
         assert type(s.name) not in (bytes, str, unicode)
         assert ffi.string(s.name) == b"testing"
-        py.test.raises(TypeError, "s.name = None")
+        with pytest.raises(TypeError):
+            s.name = None
         s.name = ffi.NULL
         assert s.name == ffi.NULL
 
@@ -657,18 +676,21 @@
         a = ffi.new("int[]", [10, 11, 12])
         p = ffi.new("void **", a)
         vp = p[0]
-        py.test.raises(TypeError, "vp[0]")
+        with pytest.raises(TypeError):
+            vp[0]
         py.test.raises(TypeError, ffi.new, "short **", a)
         #
         ffi.cdef("struct foo { void *p; int *q; short *r; };")
         s = ffi.new("struct foo *")
         s.p = a    # works
         s.q = a    # works
-        py.test.raises(TypeError, "s.r = a")    # fails
+        with pytest.raises(TypeError):
+            s.r = a    # fails
         b = ffi.cast("int *", a)
         s.p = b    # works
         s.q = b    # works
-        py.test.raises(TypeError, "s.r = b")    # fails
+        with pytest.raises(TypeError):
+            s.r = b    # fails
 
     def test_functionptr_simple(self):
         ffi = FFI(backend=self.Backend())
@@ -687,7 +709,8 @@
         q = ffi.new("int(**)(int)", p)
         assert repr(q) == "<cdata 'int(* *)(int)' owning %d bytes>" % (
             SIZE_OF_PTR)
-        py.test.raises(TypeError, "q(43)")
+        with pytest.raises(TypeError):
+            q(43)
         res = q[0](43)
         assert res == 44
         q = ffi.cast("int(*)(int)", p)
@@ -912,10 +935,14 @@
         assert s.e == 4294967295
         assert s[0].e == 4294967295
         s.e = s.e
-        py.test.raises(TypeError, "s.e = 'B'")
-        py.test.raises(TypeError, "s.e = '2'")
-        py.test.raises(TypeError, "s.e = '#2'")
-        py.test.raises(TypeError, "s.e = '#7'")
+        with pytest.raises(TypeError):
+            s.e = 'B'
+        with pytest.raises(TypeError):
+            s.e = '2'
+        with pytest.raises(TypeError):
+            s.e = '#2'
+        with pytest.raises(TypeError):
+            s.e = '#7'
 
     def test_enum_non_contiguous(self):
         ffi = FFI(backend=self.Backend())
@@ -950,11 +977,14 @@
         ffi = FFI(backend=self.Backend())
         ffi.cdef("struct foo { int a, b; };")
         s = ffi.new("struct foo[1]")
-        py.test.raises(AttributeError, 's.b')
-        py.test.raises(AttributeError, 's.b = 412')
+        with pytest.raises(AttributeError):
+            s.b
+        with pytest.raises(AttributeError):
+            s.b = 412
         s[0].b = 412
         assert s[0].b == 412
-        py.test.raises(IndexError, 's[1]')
+        with pytest.raises(IndexError):
+            s[1]
 
     def test_pointer_to_array(self):
         ffi = FFI(backend=self.Backend())
@@ -1011,17 +1041,23 @@
         assert ffi.sizeof("struct foo") == 8
         s = ffi.new("struct foo *")
         s.a = 511
-        py.test.raises(OverflowError, "s.a = 512")
-        py.test.raises(OverflowError, "s[0].a = 512")
+        with pytest.raises(OverflowError):
+            s.a = 512
+        with pytest.raises(OverflowError):
+            s[0].a = 512
         assert s.a == 511
         s.a = -512
-        py.test.raises(OverflowError, "s.a = -513")
-        py.test.raises(OverflowError, "s[0].a = -513")
+        with pytest.raises(OverflowError):
+            s.a = -513
+        with pytest.raises(OverflowError):
+            s[0].a = -513
         assert s.a == -512
         s.c = 3
         assert s.c == 3
-        py.test.raises(OverflowError, "s.c = 4")
-        py.test.raises(OverflowError, "s[0].c = 4")
+        with pytest.raises(OverflowError):
+            s.c = 4
+        with pytest.raises(OverflowError):
+            s[0].c = 4
         s.c = -4
         assert s.c == -4
 
@@ -1205,7 +1241,7 @@
             py.test.skip(str(e))
         f.write(ffi.buffer(a, 1000 * ffi.sizeof("int")))
         f.seek(0)
-        assert f.read() == array.array('i', range(1000)).tostring()
+        assert f.read() == arraytostring(array.array('i', range(1000)))
         f.seek(0)
         b = ffi.new("int[]", 1005)
         f.readinto(ffi.buffer(b, 1000 * ffi.sizeof("int")))
@@ -1224,7 +1260,7 @@
             py.test.skip(str(e))
         f.write(ffi.buffer(a, 1000 * ffi.sizeof("int")))
         f.seek(0)
-        assert f.read() == array.array('i', range(1000)).tostring()
+        assert f.read() == arraytostring(array.array('i', range(1000)))
         f.seek(0)
         b = ffi.new("int[]", 1005)
         f.readinto(ffi.buffer(b, 1000 * ffi.sizeof("int")))
@@ -1279,7 +1315,8 @@
         p = ffi.new("struct foo_s *", 10)     # a single integer is the length
         assert p.len == 0
         assert p.data[9] == 0
-        py.test.raises(IndexError, "p.data[10]")
+        with pytest.raises(IndexError):
+            p.data[10]
 
     def test_ffi_typeof_getcname(self):
         ffi = FFI(backend=self.Backend())
diff --git a/testing/cffi0/test_ffi_backend.py b/testing/cffi0/test_ffi_backend.py
index 12ecaee..8e29bc4 100644
--- a/testing/cffi0/test_ffi_backend.py
+++ b/testing/cffi0/test_ffi_backend.py
@@ -129,6 +129,36 @@
         alloc5 = ffi.new_allocator(myalloc5)
         py.test.raises(MemoryError, alloc5, "int[5]")
 
+    def test_new_struct_containing_struct_containing_array_varsize(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            struct foo_s { int len[100]; short data[]; };
+            struct bar_s { int abc[100]; struct foo_s tail; };
+        """)
+        # loop to try to detect heap overwrites, if the size allocated
+        # is too small
+        for i in range(1, 501, 100):
+            p = ffi.new("struct bar_s *", [[10], [[20], [3,4,5,6,7,8,9] * i]])
+            assert p.abc[0] == 10
+            assert p.tail.len[0] == 20
+            assert p.tail.data[0] == 3
+            assert p.tail.data[6] == 9
+            assert p.tail.data[7 * i - 1] == 9
+
+    def test_bogus_struct_containing_struct_containing_array_varsize(self):
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            struct foo_s { signed char len; signed char data[]; };
+            struct bar_s { struct foo_s foo; int bcd; };
+        """)
+        p = ffi.new("struct bar_s *", [[123, [45, 56, 67, 78]], 9999999])
+        assert p.foo.len == 123
+        assert p.foo.data[0] == 45
+        assert p.foo.data[1] == 56
+        assert p.foo.data[2] == 67
+        assert p.bcd == 9999999
+        assert p.foo.data[3] != 78   # has been overwritten with 9999999
+
 
 class TestBitfield:
     def check(self, source, expected_ofs_y, expected_align, expected_size):
@@ -149,6 +179,7 @@
         setters = ['case %d: s.%s = value; break;' % iname
                    for iname in enumerate(fnames)]
         lib = ffi1.verify("""
+            #include <string.h>
             struct s1 { %s };
             struct sa { char a; struct s1 b; };
             #define Gofs_y  offsetof(struct s1, y)
@@ -216,7 +247,10 @@
         self.check("int a:2; short b:15; char c:2; char y;", 5, 4, 8)
         self.check("int a:2; char b:1; char c:1; char y;", 1, 4, 4)
 
-    @pytest.mark.skipif("platform.machine().startswith(('arm', 'aarch64'))")
+    @pytest.mark.skipif(
+        "not (sys.platform == 'darwin' and platform.machine() == 'arm64')"
+        " and "
+        "platform.machine().startswith(('arm', 'aarch64'))")
     def test_bitfield_anonymous_no_align(self):
         L = FFI().alignof("long long")
         self.check("char y; int :1;", 0, 1, 2)
@@ -230,6 +264,8 @@
         self.check("char x; long long  :57; char y;", L + 8, 1, L + 9)
 
     @pytest.mark.skipif(
+        "(sys.platform == 'darwin' and platform.machine() == 'arm64')"
+        " or "
         "not platform.machine().startswith(('arm', 'aarch64'))")
     def test_bitfield_anonymous_align_arm(self):
         L = FFI().alignof("long long")
@@ -243,7 +279,10 @@
         self.check("char x; long long z:57; char y;", L + 8, L, L + 8 + L)
         self.check("char x; long long  :57; char y;", L + 8, L, L + 8 + L)
 
-    @pytest.mark.skipif("platform.machine().startswith(('arm', 'aarch64'))")
+    @pytest.mark.skipif(
+        "not (sys.platform == 'darwin' and platform.machine() == 'arm64')"
+        " and "
+        "platform.machine().startswith(('arm', 'aarch64'))")
     def test_bitfield_zero(self):
         L = FFI().alignof("long long")
         self.check("char y; int :0;", 0, 1, 4)
@@ -255,6 +294,8 @@
         self.check("int a:1; int :0; int b:1; char y;", 5, 4, 8)
 
     @pytest.mark.skipif(
+        "(sys.platform == 'darwin' and platform.machine() == 'arm64')"
+        " or "
         "not platform.machine().startswith(('arm', 'aarch64'))")
     def test_bitfield_zero_arm(self):
         L = FFI().alignof("long long")
@@ -268,12 +309,15 @@
 
     def test_error_cases(self):
         ffi = FFI()
-        py.test.raises(TypeError,
-            'ffi.cdef("struct s1 { float x:1; };"); ffi.new("struct s1 *")')
-        py.test.raises(TypeError,
-            'ffi.cdef("struct s2 { char x:0; };"); ffi.new("struct s2 *")')
-        py.test.raises(TypeError,
-            'ffi.cdef("struct s3 { char x:9; };"); ffi.new("struct s3 *")')
+        ffi.cdef("struct s1 { float x:1; };")
+        with pytest.raises(TypeError):
+            ffi.new("struct s1 *")
+        ffi.cdef("struct s2 { char x:0; };")
+        with pytest.raises(TypeError):
+            ffi.new("struct s2 *")
+        ffi.cdef("struct s3 { char x:9; };")
+        with pytest.raises(TypeError):
+            ffi.new("struct s3 *")
 
     def test_struct_with_typedef(self):
         ffi = FFI()
diff --git a/testing/cffi0/test_function.py b/testing/cffi0/test_function.py
index ca2353f..b4bb23d 100644
--- a/testing/cffi0/test_function.py
+++ b/testing/cffi0/test_function.py
@@ -1,10 +1,11 @@
 import py
+import pytest
 from cffi import FFI, CDefError
 import math, os, sys
 import ctypes.util
 from cffi.backend_ctypes import CTypesBackend
 from testing.udir import udir
-from testing.support import FdWriteCapture
+from testing.support import FdWriteCapture, StdErrCapture
 from .backend_tests import needs_dlopen_none
 
 try:
@@ -90,7 +91,8 @@
         """)
         m = ffi.dlopen(lib_m)
         assert m.FOOBAR == 42
-        py.test.raises(NotImplementedError, "m.baz")
+        with pytest.raises(NotImplementedError):
+            m.baz
 
     def test_tlsalloc(self):
         if sys.platform != 'win32':
@@ -111,7 +113,7 @@
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
             int fputs(const char *, void *);
-            void *stderr;
+            extern void *stderr;
         """)
         needs_dlopen_none()
         ffi.C = ffi.dlopen(None)
@@ -128,7 +130,7 @@
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
             int fputs(char *, void *);
-            void *stderr;
+            extern void *stderr;
         """)
         needs_dlopen_none()
         ffi.C = ffi.dlopen(None)
@@ -145,7 +147,7 @@
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
            int fprintf(void *, const char *format, ...);
-           void *stderr;
+           extern void *stderr;
         """)
         needs_dlopen_none()
         ffi.C = ffi.dlopen(None)
@@ -207,7 +209,7 @@
             py.test.skip("probably no symbol 'stderr' in the lib")
         ffi.cdef("""
             int fputs(const char *, void *);
-            void *stderr;
+            extern void *stderr;
         """)
         needs_dlopen_none()
         ffi.C = ffi.dlopen(None)
@@ -225,19 +227,32 @@
             def cb():
                 return returnvalue
             fptr = ffi.callback("void(*)(void)", cb)
-            old_stderr = sys.stderr
-            try:
-                sys.stderr = StringIO()
+            with StdErrCapture() as f:
                 returned = fptr()
-                printed = sys.stderr.getvalue()
-            finally:
-                sys.stderr = old_stderr
+            printed = f.getvalue()
             assert returned is None
             if returnvalue is None:
                 assert printed == ''
             else:
                 assert "None" in printed
 
+    def test_callback_returning_struct_three_bytes(self):
+        if self.Backend is CTypesBackend:
+            py.test.skip("not supported with the ctypes backend")
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            typedef struct {
+                unsigned char a, b, c;
+            } THREEBYTES;
+        """)
+        def cb():
+            return (12, 34, 56)
+        fptr = ffi.callback("THREEBYTES(*)(void)", cb)
+        tb = fptr()
+        assert tb.a == 12
+        assert tb.b == 34
+        assert tb.c == 56
+
     def test_passing_array(self):
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
@@ -254,7 +269,7 @@
             py.test.skip("probably no symbol 'stdout' in the lib")
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
-            void *stdout;
+            extern void *stdout;
         """)
         needs_dlopen_none()
         C = ffi.dlopen(None)
@@ -494,7 +509,7 @@
         ffi.cdef("""
             typedef enum { MYE1, MYE2 } myenum_t;
             double myfunc(double);
-            double myvar;
+            extern double myvar;
             const double myconst;
             #define MYFOO 42
         """)
@@ -505,7 +520,7 @@
         if self.Backend is CTypesBackend:
             py.test.skip("not with the ctypes backend")
         ffi = FFI(backend=self.Backend())
-        ffi.cdef("int foobar(void); int foobaz;")
+        ffi.cdef("int foobar(void); extern int foobaz;")
         lib = ffi.dlopen(lib_m)
         ffi.dlclose(lib)
         e = py.test.raises(ValueError, getattr, lib, 'foobar')
@@ -518,3 +533,16 @@
         assert str(e.value).startswith("library '")
         assert str(e.value).endswith("' has already been closed")
         ffi.dlclose(lib)    # does not raise
+
+    def test_passing_large_list(self):
+        if self.Backend is CTypesBackend:
+            py.test.skip("the ctypes backend doesn't support this")
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            void getenv(char *);
+        """)
+        needs_dlopen_none()
+        m = ffi.dlopen(None)
+        arg = [b"F", b"O", b"O"] + [b"\x00"] * 20000000
+        x = m.getenv(arg)
+        assert x is None
diff --git a/testing/cffi0/test_ownlib.py b/testing/cffi0/test_ownlib.py
index a06df20..ffad879 100644
--- a/testing/cffi0/test_ownlib.py
+++ b/testing/cffi0/test_ownlib.py
@@ -35,6 +35,10 @@
     long bottom;
 } RECT;
 
+typedef struct {
+    unsigned char a, b, c;
+} THREEBYTES;
+
 
 EXPORT int PointInRect(RECT *prc, POINT pt)
 {
@@ -107,6 +111,15 @@
 {
     r.left = r.right = r.top = r.bottom = 500;
 }
+
+EXPORT THREEBYTES return_three_bytes(void)
+{
+    THREEBYTES result;
+    result.a = 12;
+    result.b = 34;
+    result.c = 56;
+    return result;
+}
 """
 
 class TestOwnLib(object):
@@ -201,7 +214,7 @@
             py.test.skip("fix the auto-generation of the tiny test lib")
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
-            int my_array[7];
+            extern int my_array[7];
         """)
         ownlib = ffi.dlopen(self.module)
         for i in range(7):
@@ -223,7 +236,7 @@
             py.test.skip("not supported by the ctypes backend")
         ffi = FFI(backend=self.Backend())
         ffi.cdef("""
-            int my_array[];
+            extern int my_array[];
         """)
         ownlib = ffi.dlopen(self.module)
         for i in range(7):
@@ -291,7 +304,7 @@
                 long bottom;
             } RECT;
             
-            long left, top, right, bottom;
+            extern long left, top, right, bottom;
 
             RECT ReturnRect(int i, RECT ar, RECT* br, POINT cp, RECT dr,
                         RECT *er, POINT fp, RECT gr);
@@ -321,7 +334,7 @@
         if self.Backend is CTypesBackend:
             py.test.skip("not implemented with the ctypes backend")
         ffi = FFI(backend=self.Backend())
-        ffi.cdef("long left; int test_getting_errno(void);")
+        ffi.cdef("extern long left; int test_getting_errno(void);")
         lib = ffi.dlopen(self.module)
         lib.left = 123456
         p = ffi.addressof(lib, "left")
@@ -371,3 +384,48 @@
         assert s.top == 22
         assert s.right == 33
         assert s.bottom == 44
+
+    def test_dlopen_handle(self):
+        if self.module is None:
+            py.test.skip("fix the auto-generation of the tiny test lib")
+        if sys.platform == 'win32':
+            py.test.skip("uses 'dl' explicitly")
+        if self.__class__.Backend is CTypesBackend:
+            py.test.skip("not for the ctypes backend")
+        backend = self.Backend()
+        ffi1 = FFI(backend=backend)
+        ffi1.cdef("""void *dlopen(const char *filename, int flags);
+                     int dlclose(void *handle);""")
+        lib1 = ffi1.dlopen('dl')
+        handle = lib1.dlopen(self.module.encode(sys.getfilesystemencoding()),
+                             backend.RTLD_LAZY)
+        assert ffi1.typeof(handle) == ffi1.typeof("void *")
+        assert handle
+
+        ffi = FFI(backend=backend)
+        ffi.cdef("""unsigned short foo_2bytes(unsigned short a);""")
+        lib = ffi.dlopen(handle)
+        x = lib.foo_2bytes(1000)
+        assert x == 1042
+
+        err = lib1.dlclose(handle)
+        assert err == 0
+
+    def test_return_three_bytes(self):
+        if self.module is None:
+            py.test.skip("fix the auto-generation of the tiny test lib")
+        if self.__class__.Backend is CTypesBackend:
+            py.test.skip("not working on win32 on the ctypes backend")
+        ffi = FFI(backend=self.Backend())
+        ffi.cdef("""
+            typedef struct {
+                unsigned char a, b, c;
+            } THREEBYTES;
+
+            THREEBYTES return_three_bytes(void);
+        """)
+        lib = ffi.dlopen(self.module)
+        tb = lib.return_three_bytes()
+        assert tb.a == 12
+        assert tb.b == 34
+        assert tb.c == 56
diff --git a/testing/cffi0/test_parsing.py b/testing/cffi0/test_parsing.py
index 2d75850..a5e4587 100644
--- a/testing/cffi0/test_parsing.py
+++ b/testing/cffi0/test_parsing.py
@@ -174,7 +174,7 @@
         double // blah \\
                   more comments
         x(void);
-        double // blah\\\\
+        double // blah // blah\\\\
         y(void);
         double // blah\\ \
                   etc
@@ -185,6 +185,93 @@
     m.y
     m.z
 
+def test_dont_remove_comment_in_line_directives():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        \t # \t line \t 8 \t "baz.c" \t
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef, """
+        #line 7 "foo//bar.c"
+
+        some syntax error here
+    """)
+    #
+    assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax"
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        \t # \t 8 \t "baz.c" \t
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef, """
+        # 7 "foo//bar.c"
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax"
+
+def test_multiple_line_directives():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef,
+    """ #line 5 "foo.c"
+        extern int xx;
+        #line 6 "bar.c"
+        extern int yy;
+        #line 7 "baz.c"
+        some syntax error here
+        #line 8 "yadda.c"
+        extern int zz;
+    """)
+    assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef,
+    """ # 5 "foo.c"
+        extern int xx;
+        # 6 "bar.c"
+        extern int yy;
+        # 7 "baz.c"
+        some syntax error here
+        # 8 "yadda.c"
+        extern int zz;
+    """)
+    assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax"
+
+def test_commented_line_directive():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        /*
+        #line 5 "foo.c"
+        */
+        void xx(void);
+
+        #line 6 "bar.c"
+        /*
+        #line 35 "foo.c"
+        */
+        some syntax error
+    """)
+    #
+    assert str(e.value) == "parse error\nbar.c:9:14: before: syntax"
+    e = py.test.raises(CDefError, ffi.cdef, """
+        /*
+        # 5 "foo.c"
+        */
+        void xx(void);
+
+        # 6 "bar.c"
+        /*
+        # 35 "foo.c"
+        */
+        some syntax error
+    """)
+    assert str(e.value) == "parse error\nbar.c:9:14: before: syntax"
+
 def test_line_continuation_in_defines():
     ffi = FFI(backend=FakeBackend())
     ffi.cdef("""
@@ -324,6 +411,7 @@
     assert value == sys.maxsize * 2 - 40
 
 def test__is_constant_globalvar():
+    import warnings
     for input, expected_output in [
         ("int a;",          False),
         ("const int a;",    True),
@@ -341,10 +429,13 @@
         ("const int a[5][6];", False),
         ]:
         ffi = FFI()
-        ffi.cdef(input)
+        with warnings.catch_warnings(record=True) as log:
+            warnings.simplefilter("always")
+            ffi.cdef(input)
         declarations = ffi._parser._declarations
         assert ('constant a' in declarations) == expected_output
         assert ('variable a' in declarations) == (not expected_output)
+        assert len(log) == (1 - expected_output)
 
 def test_restrict():
     from cffi import model
@@ -354,7 +445,7 @@
         ("int *a;",            False),
         ]:
         ffi = FFI()
-        ffi.cdef(input)
+        ffi.cdef("extern " + input)
         tp, quals = ffi._parser._declarations['variable a']
         assert bool(quals & model.Q_RESTRICT) == expected_output
 
@@ -409,7 +500,17 @@
 def test_enum():
     ffi = FFI()
     ffi.cdef("""
-        enum Enum { POS = +1, TWO = 2, NIL = 0, NEG = -1, OP = (POS+TWO)-1};
+        enum Enum {
+            POS = +1,
+            TWO = 2,
+            NIL = 0,
+            NEG = -1,
+            ADDSUB = (POS+TWO)-1,
+            DIVMULINT = (3 * 3) / 2,
+            SHIFT = (1 << 3) >> 1,
+            BINOPS = (0x7 & 0x1) | 0x8,
+            XOR = 0xf ^ 0xa
+        };
         """)
     needs_dlopen_none()
     C = ffi.dlopen(None)
@@ -417,7 +518,11 @@
     assert C.TWO == 2
     assert C.NIL == 0
     assert C.NEG == -1
-    assert C.OP == 2
+    assert C.ADDSUB == 2
+    assert C.DIVMULINT == 4
+    assert C.SHIFT == 4
+    assert C.BINOPS == 0b1001
+    assert C.XOR == 0b0101
 
 def test_stdcall():
     ffi = FFI()
@@ -466,3 +571,40 @@
     e = py.test.raises(CDefError, ffi.cdef, 'void foo(void) {}')
     assert str(e.value) == ('<cdef source string>:1: unexpected <FuncDef>: '
                             'this construct is valid C but not valid in cdef()')
+
+def test_unsigned_int_suffix_for_constant():
+    ffi = FFI()
+    ffi.cdef("""enum e {
+                    bin_0=0b10,
+                    bin_1=0b10u,
+                    bin_2=0b10U,
+                    bin_3=0b10l,
+                    bin_4=0b10L,
+                    bin_5=0b10ll,
+                    bin_6=0b10LL,
+                    oct_0=010,
+                    oct_1=010u,
+                    oct_2=010U,
+                    oct_3=010l,
+                    oct_4=010L,
+                    oct_5=010ll,
+                    oct_6=010LL,
+                    dec_0=10,
+                    dec_1=10u,
+                    dec_2=10U,
+                    dec_3=10l,
+                    dec_4=10L,
+                    dec_5=10ll,
+                    dec_6=10LL,
+                    hex_0=0x10,
+                    hex_1=0x10u,
+                    hex_2=0x10U,
+                    hex_3=0x10l,
+                    hex_4=0x10L,
+                    hex_5=0x10ll,
+                    hex_6=0x10LL,};""")
+    needs_dlopen_none()
+    C = ffi.dlopen(None)
+    for base, expected_result in (('bin', 2), ('oct', 8), ('dec', 10), ('hex', 16)):
+        for index in range(7):
+            assert getattr(C, '{base}_{index}'.format(base=base, index=index)) == expected_result
diff --git a/testing/cffi0/test_verify.py b/testing/cffi0/test_verify.py
index 79e1c6c..3a1c0b9 100644
--- a/testing/cffi0/test_verify.py
+++ b/testing/cffi0/test_verify.py
@@ -1,7 +1,9 @@
 import py, re
+import pytest
 import sys, os, math, weakref
 from cffi import FFI, VerificationError, VerificationMissing, model, FFIError
 from testing.support import *
+from testing.support import extra_compile_args
 
 
 lib_m = ['m']
@@ -12,16 +14,6 @@
         lib_m = ['msvcrt']
     pass      # no obvious -Werror equivalent on MSVC
 else:
-    if (sys.platform == 'darwin' and
-          [int(x) for x in os.uname()[2].split('.')] >= [11, 0, 0]):
-        # assume a standard clang or gcc
-        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion']
-        # special things for clang
-        extra_compile_args.append('-Qunused-arguments')
-    else:
-        # assume a standard gcc
-        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion']
-
     class FFI(FFI):
         def verify(self, *args, **kwds):
             return super(FFI, self).verify(
@@ -284,7 +276,7 @@
 def test_var_signed_integer_types():
     ffi = FFI()
     lst = all_signed_integer_types(ffi)
-    csource = "\n".join(["%s somevar_%s;" % (tp, tp.replace(' ', '_'))
+    csource = "\n".join(["static %s somevar_%s;" % (tp, tp.replace(' ', '_'))
                          for tp in lst])
     ffi.cdef(csource)
     lib = ffi.verify(csource)
@@ -303,7 +295,7 @@
 def test_var_unsigned_integer_types():
     ffi = FFI()
     lst = all_unsigned_integer_types(ffi)
-    csource = "\n".join(["%s somevar_%s;" % (tp, tp.replace(' ', '_'))
+    csource = "\n".join(["static %s somevar_%s;" % (tp, tp.replace(' ', '_'))
                          for tp in lst])
     ffi.cdef(csource)
     lib = ffi.verify(csource)
@@ -589,7 +581,8 @@
     assert ffi.sizeof('struct foo_s') == 19 * ffi.sizeof('int')
     s = ffi.new("struct foo_s *")
     assert ffi.sizeof(s.a) == 17 * ffi.sizeof('int')
-    py.test.raises(IndexError, 's.a[17]')
+    with pytest.raises(IndexError):
+        s.a[17]
 
 def test_struct_array_c99_1():
     if sys.platform == 'win32':
@@ -647,7 +640,8 @@
     ffi.verify("struct foo_s { int a:2, b:3; };")
     s = ffi.new("struct foo_s *")
     s.b = 3
-    py.test.raises(OverflowError, "s.b = 4")
+    with pytest.raises(OverflowError):
+        s.b = 4
     assert s.b == 3
 
 def test_struct_with_bitfield_enum():
@@ -813,8 +807,8 @@
 
 def test_access_variable():
     ffi = FFI()
-    ffi.cdef("int foo(void);\n"
-             "int somenumber;")
+    ffi.cdef("static int foo(void);\n"
+             "static int somenumber;")
     lib = ffi.verify("""
         static int somenumber = 2;
         static int foo(void) {
@@ -831,7 +825,7 @@
 def test_access_address_of_variable():
     # access the address of 'somenumber': need a trick
     ffi = FFI()
-    ffi.cdef("int somenumber; static int *const somenumberptr;")
+    ffi.cdef("static int somenumber; static int *const somenumberptr;")
     lib = ffi.verify("""
         static int somenumber = 2;
         #define somenumberptr (&somenumber)
@@ -844,7 +838,7 @@
 def test_access_array_variable(length=5):
     ffi = FFI()
     ffi.cdef("int foo(int);\n"
-             "int somenumber[%s];" % (length,))
+             "static int somenumber[%s];" % (length,))
     lib = ffi.verify("""
         static int somenumber[] = {2, 2, 3, 4, 5};
         static int foo(int i) {
@@ -876,7 +870,7 @@
     ffi = FFI()
     ffi.cdef("struct foo { int x; ...; };\n"
              "int foo(int);\n"
-             "struct foo stuff;")
+             "static struct foo stuff;")
     lib = ffi.verify("""
         struct foo { int x, y, z; };
         static struct foo stuff = {2, 5, 8};
@@ -900,9 +894,9 @@
 
 def test_access_callback():
     ffi = FFI()
-    ffi.cdef("int (*cb)(int);\n"
-             "int foo(int);\n"
-             "void reset_cb(void);")
+    ffi.cdef("static int (*cb)(int);\n"
+             "static int foo(int);\n"
+             "static void reset_cb(void);")
     lib = ffi.verify("""
         static int g(int x) { return x * 7; }
         static int (*cb)(int);
@@ -918,9 +912,9 @@
 def test_access_callback_function_typedef():
     ffi = FFI()
     ffi.cdef("typedef int mycallback_t(int);\n"
-             "mycallback_t *cb;\n"
-             "int foo(int);\n"
-             "void reset_cb(void);")
+             "static mycallback_t *cb;\n"
+             "static int foo(int);\n"
+             "static void reset_cb(void);")
     lib = ffi.verify("""
         static int g(int x) { return x * 7; }
         static int (*cb)(int);
@@ -1070,7 +1064,7 @@
 def test_autofilled_struct_as_argument_dynamic():
     ffi = FFI()
     ffi.cdef("struct foo_s { long a; ...; };\n"
-             "int (*foo)(struct foo_s);")
+             "static int (*foo)(struct foo_s);")
     lib = ffi.verify("""
         struct foo_s {
             double b;
@@ -1079,7 +1073,7 @@
         int foo1(struct foo_s s) {
             return (int)s.a - (int)s.b;
         }
-        int (*foo)(struct foo_s s) = &foo1;
+        static int (*foo)(struct foo_s s) = &foo1;
     """)
     e = py.test.raises(NotImplementedError, lib.foo, "?")
     msg = ("ctype 'struct foo_s' not supported as argument.  It is a struct "
@@ -1449,7 +1443,7 @@
         py.test.skip("_Bool not in MSVC")
     ffi = FFI()
     ffi.cdef("struct foo_s { _Bool x; };"
-             "_Bool foo(_Bool); _Bool (*foop)(_Bool);")
+             "_Bool foo(_Bool); static _Bool (*foop)(_Bool);")
     lib = ffi.verify("""
         struct foo_s { _Bool x; };
         int foo(int arg) {
@@ -1458,13 +1452,15 @@
         _Bool _foofunc(_Bool x) {
             return !x;
         }
-        _Bool (*foop)(_Bool) = _foofunc;
+        static _Bool (*foop)(_Bool) = _foofunc;
     """)
     p = ffi.new("struct foo_s *")
     p.x = 1
     assert p.x is True
-    py.test.raises(OverflowError, "p.x = -1")
-    py.test.raises(TypeError, "p.x = 0.0")
+    with pytest.raises(OverflowError):
+        p.x = -1
+    with pytest.raises(TypeError):
+        p.x = 0.0
     assert lib.foop(1) is False
     assert lib.foop(True) is False
     assert lib.foop(0) is True
@@ -1532,7 +1528,8 @@
                 }
             """ % (type, type))
             p = ffi.new("struct foo_s *")
-            py.test.raises(TypeError, "p.x = 0.0")
+            with pytest.raises(TypeError):
+                p.x = 0.0
             assert lib.foo(42) == 0
             assert lib.foo(0) == 1
             py.test.raises(TypeError, lib.foo, 0.0)
@@ -1646,7 +1643,7 @@
 
 def test_FILE_stored_explicitly():
     ffi = FFI()
-    ffi.cdef("int myprintf11(const char *, int); FILE *myfile;")
+    ffi.cdef("int myprintf11(const char *, int); extern FILE *myfile;")
     lib = ffi.verify("""
         #include <stdio.h>
         FILE *myfile;
@@ -1672,19 +1669,19 @@
 
 def test_global_array_with_missing_length():
     ffi = FFI()
-    ffi.cdef("int fooarray[];")
+    ffi.cdef("extern int fooarray[];")
     lib = ffi.verify("int fooarray[50];")
     assert repr(lib.fooarray).startswith("<cdata 'int *'")
 
 def test_global_array_with_dotdotdot_length():
     ffi = FFI()
-    ffi.cdef("int fooarray[...];")
+    ffi.cdef("extern int fooarray[...];")
     lib = ffi.verify("int fooarray[50];")
     assert repr(lib.fooarray).startswith("<cdata 'int[50]'")
 
 def test_bad_global_array_with_dotdotdot_length():
     ffi = FFI()
-    ffi.cdef("int fooarray[...];")
+    ffi.cdef("extern int fooarray[...];")
     py.test.raises(VerificationError, ffi.verify, "char fooarray[23];")
 
 def test_struct_containing_struct():
@@ -1805,7 +1802,7 @@
 def test_callback_indirection():
     ffi = FFI()
     ffi.cdef("""
-        int (*python_callback)(int how_many, int *values);
+        static int (*python_callback)(int how_many, int *values);
         int (*const c_callback)(int,...);   /* pass this ptr to C routines */
         int some_c_function(int(*cb)(int,...));
     """)
@@ -1939,24 +1936,24 @@
 
 def test_bug_const_char_ptr_array_1():
     ffi = FFI()
-    ffi.cdef("""const char *a[...];""")
+    ffi.cdef("""extern const char *a[...];""")
     lib = ffi.verify("""const char *a[5];""")
     assert repr(ffi.typeof(lib.a)) == "<ctype 'char *[5]'>"
 
 def test_bug_const_char_ptr_array_2():
     from cffi import FFI     # ignore warnings
     ffi = FFI()
-    ffi.cdef("""const int a[];""")
+    ffi.cdef("""extern const int a[];""")
     lib = ffi.verify("""const int a[5];""")
     assert repr(ffi.typeof(lib.a)) == "<ctype 'int *'>"
 
 def _test_various_calls(force_libffi):
     cdef_source = """
-    int xvalue;
-    long long ivalue, rvalue;
-    float fvalue;
-    double dvalue;
-    long double Dvalue;
+    extern int xvalue;
+    extern long long ivalue, rvalue;
+    extern float fvalue;
+    extern double dvalue;
+    extern long double Dvalue;
     signed char tf_bb(signed char x, signed char c);
     unsigned char tf_bB(signed char x, unsigned char c);
     short tf_bh(signed char x, short c);
@@ -2098,6 +2095,11 @@
             raise errors[0][1]
 
 def test_errno_working_even_with_pypys_jit():
+    # NOTE: on some platforms, to work correctly, this test needs to be
+    # compiled with -pthread.  Otherwise, the accesses to errno done from f()
+    # are compiled by assuming this small library won't be used from multiple
+    # threads, which is wrong.  If you see failures _and_ if you pass your
+    # own CFLAGS environment variable, please make sure "-pthread" is in it.
     ffi = FFI()
     ffi.cdef("int f(int);")
     lib = ffi.verify("""
@@ -2135,7 +2137,7 @@
     # exported symbols as well.  So we must not export a simple name
     # like 'foo'!
     ffi1 = FFI()
-    ffi1.cdef("int foo_verify_dlopen_flags;")
+    ffi1.cdef("extern int foo_verify_dlopen_flags;")
 
     lib1 = ffi1.verify("int foo_verify_dlopen_flags;",
                        flags=ffi1.RTLD_GLOBAL | ffi1.RTLD_LAZY)
@@ -2149,7 +2151,7 @@
 def get_second_lib():
     # Hack, using modulename makes the test fail
     ffi2 = FFI()
-    ffi2.cdef("int foo_verify_dlopen_flags;")
+    ffi2.cdef("extern int foo_verify_dlopen_flags;")
     lib2 = ffi2.verify("int foo_verify_dlopen_flags;",
                        flags=ffi2.RTLD_GLOBAL | ffi2.RTLD_LAZY)
     return lib2
@@ -2522,3 +2524,39 @@
         x.p = p
         x.cyclic = x
         del p, x
+
+def test_arithmetic_in_cdef():
+    for a in [0, 11, 15]:
+        ffi = FFI()
+        ffi.cdef("""
+            enum FOO {
+                DIVNN = ((-?) / (-3)),
+                DIVNP = ((-?) / (+3)),
+                DIVPN = ((+?) / (-3)),
+                MODNN = ((-?) % (-3)),
+                MODNP = ((-?) % (+3)),
+                MODPN = ((+?) % (-3)),
+                };
+        """.replace('?', str(a)))
+        lib = ffi.verify("""
+            enum FOO {
+                DIVNN = ((-?) / (-3)),
+                DIVNP = ((-?) / (+3)),
+                DIVPN = ((+?) / (-3)),
+                MODNN = ((-?) % (-3)),
+                MODNP = ((-?) % (+3)),
+                MODPN = ((+?) % (-3)),
+                };
+        """.replace('?', str(a)))
+        # the verify() crashes if the values in the enum are different from
+        # the values we computed ourselves from the cdef()
+
+def test_passing_large_list():
+    ffi = FFI()
+    ffi.cdef("""void passing_large_list(long[]);""")
+    lib = ffi.verify("""
+        static void passing_large_list(long a[]) { }
+    """)
+    arg = list(range(20000000))
+    lib.passing_large_list(arg)
+    # assert did not segfault
diff --git a/testing/cffi0/test_version.py b/testing/cffi0/test_version.py
index 9325685..facb84c 100644
--- a/testing/cffi0/test_version.py
+++ b/testing/cffi0/test_version.py
@@ -18,6 +18,7 @@
     version_info = '.'.join(str(i) for i in cffi.__version_info__)
     version_info = version_info.replace('.beta.', 'b')
     version_info = version_info.replace('.plus', '+')
+    version_info = version_info.replace('.rc', 'rc')
     assert v == version_info
     #v = BACKEND_VERSIONS.get(v, v)
     assert v == _cffi_backend.__version__
@@ -36,7 +37,12 @@
     v = cffi.__version__.replace('+', '')
     p = os.path.join(parent, 'doc', 'source', 'installation.rst')
     content = open(p).read()
-    assert (" package version %s:" % v) in content
+    if " package version %s:" % v not in content:
+        for i in range(5):
+            if " package version %s-%d:" % (v, i) in content:
+                break
+        else:
+            assert 0, "doc/source/installation.rst needs updating"
 
 def test_setup_version():
     parent = os.path.dirname(os.path.dirname(cffi.__file__))
diff --git a/testing/cffi0/test_zdistutils.py b/testing/cffi0/test_zdistutils.py
index b67b105..35b3d0c 100644
--- a/testing/cffi0/test_zdistutils.py
+++ b/testing/cffi0/test_zdistutils.py
@@ -89,7 +89,7 @@
         csrc = '/*hi there %s!2*/\n#include <math.h>\n' % self
         v = Verifier(ffi, csrc, force_generic_engine=self.generic,
                      libraries=[self.lib_m])
-        basename = self.__class__.__name__ + 'test_compile_module'
+        basename = self.__class__.__name__[:10] + '_test_compile_module'
         v.modulefilename = filename = str(udir.join(basename + '.so'))
         v.compile_module()
         assert filename == v.modulefilename
diff --git a/testing/cffi0/test_zintegration.py b/testing/cffi0/test_zintegration.py
index d56dac2..ce925b8 100644
--- a/testing/cffi0/test_zintegration.py
+++ b/testing/cffi0/test_zintegration.py
@@ -1,11 +1,13 @@
 import py, os, sys, shutil
 import subprocess
 from testing.udir import udir
+import pytest
 
 if sys.platform == 'win32':
-    py.test.skip('snippets do not run on win32')
+    pytestmark = pytest.mark.skip('snippets do not run on win32')
 if sys.version_info < (2, 7):
-    py.test.skip('fails e.g. on a Debian/Ubuntu which patches virtualenv'
+    pytestmark = pytest.mark.skip(
+                 'fails e.g. on a Debian/Ubuntu which patches virtualenv'
                  ' in a non-2.6-friendly way')
 
 def create_venv(name):
@@ -75,7 +77,10 @@
         env = os.environ.copy()
         env['PYTHONPATH'] = paths
         subprocess.check_call((vp, 'setup.py', 'clean'), env=env)
-        subprocess.check_call((vp, 'setup.py', 'install'), env=env)
+        # there's a setuptools/easy_install bug that causes this to fail when the build/install occur together and
+        # we're in the same directory with the build (it tries to look up dependencies for itself on PyPI);
+        # subsequent runs will succeed because this test doesn't properly clean up the build- use pip for now.
+        subprocess.check_call((vp, '-m', 'pip', 'install', '.'), env=env)
         subprocess.check_call((vp, str(python_f)), env=env)
     finally:
         os.chdir(olddir)
diff --git a/testing/cffi1/test_cffi_binary.py b/testing/cffi1/test_cffi_binary.py
index 25953db..7cfbace 100644
--- a/testing/cffi1/test_cffi_binary.py
+++ b/testing/cffi1/test_cffi_binary.py
@@ -10,6 +10,8 @@
     for line in g:
         if not line.startswith('0'):
             continue
+        if line[line.find(' ') + 1] == 'l':
+            continue
         if '*UND*' in line:
             continue
         name = line.split()[-1]
diff --git a/testing/cffi1/test_dlopen.py b/testing/cffi1/test_dlopen.py
index 1c20550..26a2717 100644
--- a/testing/cffi1/test_dlopen.py
+++ b/testing/cffi1/test_dlopen.py
@@ -6,7 +6,7 @@
 
 def test_simple():
     ffi = FFI()
-    ffi.cdef("int close(int); static const int BB = 42; int somevar;")
+    ffi.cdef("int close(int); static const int BB = 42; extern int somevar;")
     target = udir.join('test_simple.py')
     make_py_source(ffi, 'test_simple', str(target))
     assert target.read() == r"""# auto-generated file
@@ -196,7 +196,7 @@
 
 def test_global_var():
     ffi = FFI()
-    ffi.cdef("int myglob;")
+    ffi.cdef("extern int myglob;")
     target = udir.join('test_global_var.py')
     make_py_source(ffi, 'test_global_var', str(target))
     assert target.read() == r"""# auto-generated file
diff --git a/testing/cffi1/test_ffi_obj.py b/testing/cffi1/test_ffi_obj.py
index e07d6f9..0d29290 100644
--- a/testing/cffi1/test_ffi_obj.py
+++ b/testing/cffi1/test_ffi_obj.py
@@ -1,4 +1,5 @@
 import py, sys
+import pytest
 import _cffi_backend as _cffi1_backend
 
 
@@ -85,9 +86,12 @@
 
 def test_ffi_no_attr():
     ffi = _cffi1_backend.FFI()
-    py.test.raises(AttributeError, "ffi.no_such_name")
-    py.test.raises(AttributeError, "ffi.no_such_name = 42")
-    py.test.raises(AttributeError, "del ffi.no_such_name")
+    with pytest.raises(AttributeError):
+        ffi.no_such_name
+    with pytest.raises(AttributeError):
+        ffi.no_such_name = 42
+    with pytest.raises(AttributeError):
+        del ffi.no_such_name
 
 def test_ffi_string():
     ffi = _cffi1_backend.FFI()
diff --git a/testing/cffi1/test_function_args.py b/testing/cffi1/test_function_args.py
new file mode 100644
index 0000000..30c6fed
--- /dev/null
+++ b/testing/cffi1/test_function_args.py
@@ -0,0 +1,208 @@
+import pytest, sys
+try:
+    # comment out the following line to run this test.
+    # the latest on x86-64 linux: https://github.com/libffi/libffi/issues/574
+    if sys.platform != 'win32':
+        raise ImportError("this test is skipped because it keeps finding "
+                          "failures in libffi, instead of cffi")
+
+    from hypothesis import given, settings, example
+    from hypothesis import strategies as st
+except ImportError as e:
+    e1 = e
+    def test_types():
+        pytest.skip(str(e1))
+else:
+
+    from cffi import FFI
+    import sys, random
+    from .test_recompiler import verify
+
+    ALL_PRIMITIVES = [
+        'unsigned char',
+        'short',
+        'int',
+        'long',
+        'long long',
+        'float',
+        'double',
+        #'long double',   --- on x86 it can give libffi crashes
+    ]
+    def _make_struct(s):
+        return st.lists(s, min_size=1)
+    types = st.one_of(st.sampled_from(ALL_PRIMITIVES),
+                      st.lists(st.sampled_from(ALL_PRIMITIVES), min_size=1))
+    # NB. 'types' could be st.recursive instead, but it doesn't
+    # really seem useful
+
+    def draw_primitive(ffi, typename):
+        value = random.random() * 2**40
+        if typename != 'long double':
+            return ffi.cast(typename, value)
+        else:
+            return value
+
+    TEST_RUN_COUNTER = 0
+
+
+    @given(st.lists(types), types)
+    @settings(max_examples=100, deadline=5000)   # 5000ms
+    def test_types(tp_args, tp_result):
+        global TEST_RUN_COUNTER
+        print(tp_args, tp_result)
+        cdefs = []
+        structs = {}
+
+        def build_type(tp):
+            if type(tp) is list:
+                field_types = [build_type(tp1) for tp1 in tp]
+                fields = ['%s f%d;' % (ftp, j)
+                          for (j, ftp) in enumerate(field_types)]
+                fields = '\n    '.join(fields)
+                name = 's%d' % len(cdefs)
+                cdefs.append("typedef struct {\n    %s\n} %s;" % (fields, name))
+                structs[name] = field_types
+                return name
+            else:
+                return tp
+
+        args = [build_type(tp) for tp in tp_args]
+        result = build_type(tp_result)
+
+        TEST_RUN_COUNTER += 1
+        signature = "%s testfargs(%s)" % (result,
+            ', '.join(['%s a%d' % (arg, i) for (i, arg) in enumerate(args)])
+            or 'void')
+
+        source = list(cdefs)
+
+        cdefs.append("%s;" % signature)
+        cdefs.append("extern %s testfargs_result;" % result)
+        for i, arg in enumerate(args):
+            cdefs.append("extern %s testfargs_arg%d;" % (arg, i))
+        source.append("%s testfargs_result;" % result)
+        for i, arg in enumerate(args):
+            source.append("%s testfargs_arg%d;" % (arg, i))
+        source.append(signature)
+        source.append("{")
+        for i, arg in enumerate(args):
+            source.append("    testfargs_arg%d = a%d;" % (i, i))
+        source.append("    return testfargs_result;")
+        source.append("}")
+
+        typedef_line = "typedef %s;" % (signature.replace('testfargs',
+                                                          '(*mycallback_t)'),)
+        assert signature.endswith(')')
+        sig_callback = "%s testfcallback(mycallback_t callback)" % result
+        cdefs.append(typedef_line)
+        cdefs.append("%s;" % sig_callback)
+        source.append(typedef_line)
+        source.append(sig_callback)
+        source.append("{")
+        source.append("    return callback(%s);" %
+                ', '.join(["testfargs_arg%d" % i for i in range(len(args))]))
+        source.append("}")
+
+        ffi = FFI()
+        ffi.cdef("\n".join(cdefs))
+        lib = verify(ffi, 'test_function_args_%d' % TEST_RUN_COUNTER,
+                     "\n".join(source), no_cpp=True)
+
+        # when getting segfaults, enable this:
+        if False:
+            from testing.udir import udir
+            import subprocess
+            f = open(str(udir.join('run1.py')), 'w')
+            f.write('import sys; sys.path = %r\n' % (sys.path,))
+            f.write('from _CFFI_test_function_args_%d import ffi, lib\n' %
+                    TEST_RUN_COUNTER)
+            for i in range(len(args)):
+                f.write('a%d = ffi.new("%s *")\n' % (i, args[i]))
+            aliststr = ', '.join(['a%d[0]' % i for i in range(len(args))])
+            f.write('lib.testfargs(%s)\n' % aliststr)
+            f.write('ffi.addressof(lib, "testfargs")(%s)\n' % aliststr)
+            f.close()
+            print("checking for segfault for direct call...")
+            rc = subprocess.call([sys.executable, 'run1.py'], cwd=str(udir))
+            assert rc == 0, rc
+
+        def make_arg(tp):
+            if tp in structs:
+                return [make_arg(tp1) for tp1 in structs[tp]]
+            else:
+                return draw_primitive(ffi, tp)
+
+        passed_args = [make_arg(arg) for arg in args]
+        returned_value = make_arg(result)
+
+        def write(p, v):
+            if type(v) is list:
+                for i, v1 in enumerate(v):
+                    write(ffi.addressof(p, 'f%d' % i), v1)
+            else:
+                p[0] = v
+
+        write(ffi.addressof(lib, 'testfargs_result'), returned_value)
+
+        ## CALL forcing libffi
+        print("CALL forcing libffi")
+        received_return = ffi.addressof(lib, 'testfargs')(*passed_args)
+        ##
+
+        _tp_long_double = ffi.typeof("long double")
+        def check(p, v):
+            if type(v) is list:
+                for i, v1 in enumerate(v):
+                    check(ffi.addressof(p, 'f%d' % i), v1)
+            else:
+                if ffi.typeof(p).item is _tp_long_double:
+                    assert ffi.cast("double", p[0]) == v
+                else:
+                    assert p[0] == v
+
+        for i, arg in enumerate(passed_args):
+            check(ffi.addressof(lib, 'testfargs_arg%d' % i), arg)
+        ret = ffi.new(result + "*", received_return)
+        check(ret, returned_value)
+
+        ## CALLBACK
+        def expand(value):
+            if isinstance(value, ffi.CData):
+                t = ffi.typeof(value)
+                if t is _tp_long_double:
+                    return float(ffi.cast("double", value))
+                return [expand(getattr(value, 'f%d' % i))
+                        for i in range(len(t.fields))]
+            else:
+                return value
+
+        # when getting segfaults, enable this:
+        if False:
+            from testing.udir import udir
+            import subprocess
+            f = open(str(udir.join('run1.py')), 'w')
+            f.write('import sys; sys.path = %r\n' % (sys.path,))
+            f.write('from _CFFI_test_function_args_%d import ffi, lib\n' %
+                    TEST_RUN_COUNTER)
+            f.write('def callback(*args): return ffi.new("%s *")[0]\n' % result)
+            f.write('fptr = ffi.callback("%s(%s)", callback)\n' % (result,
+                                                                ','.join(args)))
+            f.write('print(lib.testfcallback(fptr))\n')
+            f.close()
+            print("checking for segfault for callback...")
+            rc = subprocess.call([sys.executable, 'run1.py'], cwd=str(udir))
+            assert rc == 0, rc
+
+        seen_args = []
+        def callback(*args):
+            seen_args.append([expand(arg) for arg in args])
+            return returned_value
+
+        fptr = ffi.callback("%s(%s)" % (result, ','.join(args)), callback)
+        print("CALL with callback")
+        received_return = lib.testfcallback(fptr)
+
+        assert len(seen_args) == 1
+        assert passed_args == seen_args[0]
+        ret = ffi.new(result + "*", received_return)
+        check(ret, returned_value)
diff --git a/testing/cffi1/test_new_ffi_1.py b/testing/cffi1/test_new_ffi_1.py
index 209cb30..640830b 100644
--- a/testing/cffi1/test_new_ffi_1.py
+++ b/testing/cffi1/test_new_ffi_1.py
@@ -1,4 +1,5 @@
 import py
+import pytest
 import platform, imp
 import sys, os, ctypes
 import cffi
@@ -186,10 +187,14 @@
         p[9] = 43
         assert p[0] == 42
         assert p[9] == 43
-        py.test.raises(IndexError, "p[10]")
-        py.test.raises(IndexError, "p[10] = 44")
-        py.test.raises(IndexError, "p[-1]")
-        py.test.raises(IndexError, "p[-1] = 44")
+        with pytest.raises(IndexError):
+            p[10]
+        with pytest.raises(IndexError):
+            p[10] = 44
+        with pytest.raises(IndexError):
+            p[-1]
+        with pytest.raises(IndexError):
+            p[-1] = 44
 
     def test_new_array_args(self):
         # this tries to be closer to C: where we say "int x[5] = {10, 20, ..}"
@@ -212,18 +217,21 @@
     def test_new_array_varsize(self):
         p = ffi.new("int[]", 10)     # a single integer is the length
         assert p[9] == 0
-        py.test.raises(IndexError, "p[10]")
+        with pytest.raises(IndexError):
+            p[10]
         #
         py.test.raises(TypeError, ffi.new, "int[]")
         #
         p = ffi.new("int[]", [-6, -7])    # a list is all the items, like C
         assert p[0] == -6
         assert p[1] == -7
-        py.test.raises(IndexError, "p[2]")
+        with pytest.raises(IndexError):
+            p[2]
         assert repr(p) == "<cdata 'int[]' owning %d bytes>" % (2*SIZE_OF_INT)
         #
         p = ffi.new("int[]", 0)
-        py.test.raises(IndexError, "p[0]")
+        with pytest.raises(IndexError):
+            p[0]
         py.test.raises(ValueError, ffi.new, "int[]", -1)
         assert repr(p) == "<cdata 'int[]' owning 0 bytes>"
 
@@ -324,7 +332,8 @@
         p[2][3] = 33
         assert p[0][0] == 10
         assert p[2][3] == 33
-        py.test.raises(IndexError, "p[1][-1]")
+        with pytest.raises(IndexError):
+            p[1][-1]
 
     def test_constructor_array_of_array(self):
         p = ffi.new("int[3][2]", [[10, 11], [12, 13], [14, 15]])
@@ -445,7 +454,8 @@
         n = ffi.new("int*", 99)
         p = ffi.new("int*[]", [n])
         assert p[0][0] == 99
-        py.test.raises(TypeError, "p[0] = None")
+        with pytest.raises(TypeError):
+            p[0] = None
         p[0] = ffi.NULL
         assert p[0] == ffi.NULL
 
@@ -478,13 +488,15 @@
         assert s.a == s.b == s.c == 0
         s.b = -23
         assert s.b == -23
-        py.test.raises(OverflowError, "s.b = 32768")
+        with pytest.raises(OverflowError):
+            s.b = 32768
         #
         s = ffi.new("struct simple*", [-2, -3])
         assert s.a == -2
         assert s.b == -3
         assert s.c == 0
-        py.test.raises((AttributeError, TypeError), "del s.a")
+        with pytest.raises((AttributeError, TypeError)):
+            del s.a
         assert repr(s) == "<cdata 'struct simple *' owning %d bytes>" % (
             SIZE_OF_INT + 2 * SIZE_OF_SHORT)
         #
@@ -502,8 +514,10 @@
         assert s[0].a == s[0].b == s[0].c == 0
         s[0].b = -23
         assert s[0].b == s.b == -23
-        py.test.raises(OverflowError, "s[0].b = -32769")
-        py.test.raises(IndexError, "s[1]")
+        with pytest.raises(OverflowError):
+            s[0].b = -32769
+        with pytest.raises(IndexError):
+            s[1]
 
     def test_struct_opaque(self):
         py.test.raises(ffi.error, ffi.new, "struct baz*")
@@ -555,11 +569,13 @@
         u.b = -23
         assert u.b == -23
         assert u.a != 0
-        py.test.raises(OverflowError, "u.b = 32768")
+        with pytest.raises(OverflowError):
+            u.b = 32768
         #
         u = ffi.new("union simple_u*", [-2])
         assert u.a == -2
-        py.test.raises((AttributeError, TypeError), "del u.a")
+        with pytest.raises((AttributeError, TypeError)):
+            del u.a
         assert repr(u) == "<cdata 'union simple_u *' owning %d bytes>" % (
             SIZE_OF_INT,)
 
@@ -625,7 +641,8 @@
         p[3] = b'\x00'
         assert ffi.string(p) == b"hel"
         assert ffi.string(p, 2) == b"he"
-        py.test.raises(IndexError, "p[7] = b'X'")
+        with pytest.raises(IndexError):
+            p[7] = b'X'
         #
         a = ffi.new("char[]", b"hello\x00world")
         assert len(a) == 12
@@ -648,7 +665,8 @@
         p[3] = u+'\x00'
         assert ffi.string(p) == u+"hel"
         assert ffi.string(p, 123) == u+"hel"
-        py.test.raises(IndexError, "p[7] = u+'X'")
+        with pytest.raises(IndexError):
+            p[7] = u+'X'
         #
         a = ffi.new("wchar_t[]", u+"hello\x00world")
         assert len(a) == 12
@@ -664,7 +682,8 @@
         s = ffi.new("struct string*", [t])
         assert type(s.name) not in (bytes, str, unicode)
         assert ffi.string(s.name) == b"testing"
-        py.test.raises(TypeError, "s.name = None")
+        with pytest.raises(TypeError):
+            s.name = None
         s.name = ffi.NULL
         assert s.name == ffi.NULL
 
@@ -685,17 +704,20 @@
         a = ffi.new("int[]", [10, 11, 12])
         p = ffi.new("void **", a)
         vp = p[0]
-        py.test.raises(TypeError, "vp[0]")
+        with pytest.raises(TypeError):
+            vp[0]
         py.test.raises(TypeError, ffi.new, "short **", a)
         #
         s = ffi.new("struct voidp *")
         s.p = a    # works
         s.q = a    # works
-        py.test.raises(TypeError, "s.r = a")    # fails
+        with pytest.raises(TypeError):
+            s.r = a    # fails
         b = ffi.cast("int *", a)
         s.p = b    # works
         s.q = b    # works
-        py.test.raises(TypeError, "s.r = b")    # fails
+        with pytest.raises(TypeError):
+            s.r = b    # fails
 
     def test_functionptr_simple(self):
         py.test.raises(TypeError, ffi.callback, "int(*)(int)", 0)
@@ -713,7 +735,8 @@
         q = ffi.new("int(**)(int)", p)
         assert repr(q) == "<cdata 'int(* *)(int)' owning %d bytes>" % (
             SIZE_OF_PTR)
-        py.test.raises(TypeError, "q(43)")
+        with pytest.raises(TypeError):
+            q(43)
         res = q[0](43)
         assert res == 44
         q = ffi.cast("int(*)(int)", p)
@@ -922,10 +945,14 @@
         assert s.e in (4294967295, -1)     # two choices
         assert s[0].e in (4294967295, -1)
         s.e = s.e
-        py.test.raises(TypeError, "s.e = 'B3'")
-        py.test.raises(TypeError, "s.e = '2'")
-        py.test.raises(TypeError, "s.e = '#2'")
-        py.test.raises(TypeError, "s.e = '#7'")
+        with pytest.raises(TypeError):
+            s.e = 'B3'
+        with pytest.raises(TypeError):
+            s.e = '2'
+        with pytest.raises(TypeError):
+            s.e = '#2'
+        with pytest.raises(TypeError):
+            s.e = '#7'
 
     def test_enum_non_contiguous(self):
         # enum noncont { A4, B4=42, C4 };
@@ -947,11 +974,14 @@
 
     def test_array_of_struct(self):
         s = ffi.new("struct ab[1]")
-        py.test.raises(AttributeError, 's.b')
-        py.test.raises(AttributeError, 's.b = 412')
+        with pytest.raises(AttributeError):
+            s.b
+        with pytest.raises(AttributeError):
+            s.b = 412
         s[0].b = 412
         assert s[0].b == 412
-        py.test.raises(IndexError, 's[1]')
+        with pytest.raises(IndexError):
+            s[1]
 
     def test_pointer_to_array(self):
         p = ffi.new("int(**)[5]")
@@ -1000,17 +1030,23 @@
         assert ffi.sizeof("struct bitfield") == 8
         s = ffi.new("struct bitfield *")
         s.a = 511
-        py.test.raises(OverflowError, "s.a = 512")
-        py.test.raises(OverflowError, "s[0].a = 512")
+        with pytest.raises(OverflowError):
+            s.a = 512
+        with pytest.raises(OverflowError):
+            s[0].a = 512
         assert s.a == 511
         s.a = -512
-        py.test.raises(OverflowError, "s.a = -513")
-        py.test.raises(OverflowError, "s[0].a = -513")
+        with pytest.raises(OverflowError):
+            s.a = -513
+        with pytest.raises(OverflowError):
+            s[0].a = -513
         assert s.a == -512
         s.c = 3
         assert s.c == 3
-        py.test.raises(OverflowError, "s.c = 4")
-        py.test.raises(OverflowError, "s[0].c = 4")
+        with pytest.raises(OverflowError):
+            s.c = 4
+        with pytest.raises(OverflowError):
+            s[0].c = 4
         s.c = -4
         assert s.c == -4
 
@@ -1184,7 +1220,7 @@
             py.test.skip(str(e))
         f.write(ffi.buffer(a, 1000 * ffi.sizeof("int")))
         f.seek(0)
-        assert f.read() == array.array('i', range(1000)).tostring()
+        assert f.read() == arraytostring(array.array('i', range(1000)))
         f.seek(0)
         b = ffi.new("int[]", 1005)
         f.readinto(ffi.buffer(b, 1000 * ffi.sizeof("int")))
@@ -1202,7 +1238,7 @@
             py.test.skip(str(e))
         f.write(ffi.buffer(a, 1000 * ffi.sizeof("int")))
         f.seek(0)
-        assert f.read() == array.array('i', range(1000)).tostring()
+        assert f.read() == arraytostring(array.array('i', range(1000)))
         f.seek(0)
         b = ffi.new("int[]", 1005)
         f.readinto(ffi.buffer(b, 1000 * ffi.sizeof("int")))
@@ -1235,7 +1271,8 @@
         p = ffi.new("struct foo_s *", 10)     # a single integer is the length
         assert p.len == 0
         assert p.data[9] == 0
-        py.test.raises(IndexError, "p.data[10]")
+        with pytest.raises(IndexError):
+            p.data[10]
 
     def test_ffi_typeof_getcname(self):
         assert ffi.getctype("int") == "int"
@@ -1742,7 +1779,7 @@
 
     def test_import_from_lib(self):
         ffi2 = cffi.FFI()
-        ffi2.cdef("int myfunc(int); int myvar;\n#define MYFOO ...\n")
+        ffi2.cdef("int myfunc(int); extern int myvar;\n#define MYFOO ...\n")
         outputfilename = recompile(ffi2, "_test_import_from_lib",
                                    "int myfunc(int x) { return x + 1; }\n"
                                    "int myvar = -5;\n"
@@ -1752,7 +1789,8 @@
         assert MYFOO == 42
         assert myfunc(43) == 44
         assert myvar == -5     # but can't be changed, so not very useful
-        py.test.raises(ImportError, "from _test_import_from_lib.lib import bar")
+        with pytest.raises(ImportError):
+            from _test_import_from_lib.lib import bar
         d = {}
         exec("from _test_import_from_lib.lib import *", d)
         assert (set(key for key in d if not key.startswith('_')) ==
diff --git a/testing/cffi1/test_re_python.py b/testing/cffi1/test_re_python.py
index 377c29b..2ae0dd1 100644
--- a/testing/cffi1/test_re_python.py
+++ b/testing/cffi1/test_re_python.py
@@ -63,18 +63,20 @@
     #define BIGNEG -420000000000L
     int add42(int);
     int add43(int, ...);
-    int globalvar42;
+    extern int globalvar42;
     const int globalconst42;
-    const char *const globalconsthello = "hello";
+    const char *const globalconsthello;
     int no_such_function(int);
-    int no_such_globalvar;
+    extern int no_such_globalvar;
     struct foo_s;
     typedef struct bar_s { int x; signed char a[]; } bar_t;
     enum foo_e { AA, BB, CC };
     int strlen(const char *);
     struct with_union { union { int a; char b; }; };
     union with_struct { struct { int a; char b; }; };
+    struct with_struct_with_union { struct { union { int x; }; } cp; };
     struct NVGcolor { union { float rgba[4]; struct { float r,g,b,a; }; }; };
+    typedef struct selfref { struct selfref *next; } *selfref_ptr_t;
     """)
     ffi.set_source('re_python_pysrc', None)
     ffi.emit_python_code(str(tmpdir.join('re_python_pysrc.py')))
@@ -247,6 +249,10 @@
     assert ffi.offsetof("union with_struct", "b") == INT
     assert ffi.sizeof("union with_struct") >= INT + 1
     #
+    assert ffi.sizeof("struct with_struct_with_union") == INT
+    p = ffi.new("struct with_struct_with_union *")
+    assert p.cp.x == 0
+    #
     FLOAT = ffi.sizeof("float")
     assert ffi.sizeof("struct NVGcolor") == FLOAT * 4
     assert ffi.offsetof("struct NVGcolor", "rgba") == 0
@@ -254,3 +260,29 @@
     assert ffi.offsetof("struct NVGcolor", "g") == FLOAT
     assert ffi.offsetof("struct NVGcolor", "b") == FLOAT * 2
     assert ffi.offsetof("struct NVGcolor", "a") == FLOAT * 3
+
+def test_selfref():
+    # based on issue #429
+    from re_python_pysrc import ffi
+    ffi.new("selfref_ptr_t")
+
+def test_dlopen_handle():
+    import _cffi_backend
+    from re_python_pysrc import ffi
+    if sys.platform == 'win32':
+        py.test.skip("uses 'dl' explicitly")
+    ffi1 = FFI()
+    ffi1.cdef("""void *dlopen(const char *filename, int flags);
+                 int dlclose(void *handle);""")
+    lib1 = ffi1.dlopen('dl')
+    handle = lib1.dlopen(extmod.encode(sys.getfilesystemencoding()),
+                         _cffi_backend.RTLD_LAZY)
+    assert ffi1.typeof(handle) == ffi1.typeof("void *")
+    assert handle
+
+    lib = ffi.dlopen(handle)
+    assert lib.add42(-10) == 32
+    assert type(lib.add42) is _cffi_backend.FFI.CData
+
+    err = lib1.dlclose(handle)
+    assert err == 0
diff --git a/testing/cffi1/test_recompiler.py b/testing/cffi1/test_recompiler.py
index 6a31110..fdb4d5a 100644
--- a/testing/cffi1/test_recompiler.py
+++ b/testing/cffi1/test_recompiler.py
@@ -1,5 +1,6 @@
 
 import sys, os, py
+import pytest
 from cffi import FFI, VerificationError, FFIError, CDefError
 from cffi import recompiler
 from testing.udir import udir
@@ -25,16 +26,21 @@
 
 def verify(ffi, module_name, source, *args, **kwds):
     no_cpp = kwds.pop('no_cpp', False)
+    ignore_warnings = kwds.pop('ignore_warnings', False)
     kwds.setdefault('undef_macros', ['NDEBUG'])
     module_name = '_CFFI_' + module_name
     ffi.set_source(module_name, source)
     if not os.environ.get('NO_CPP') and not no_cpp:   # test the .cpp mode too
         kwds.setdefault('source_extension', '.cpp')
         source = 'extern "C" {\n%s\n}' % (source,)
-    elif sys.platform != 'win32':
+    elif sys.platform != 'win32' and not ignore_warnings:
         # add '-Werror' to the existing 'extra_compile_args' flags
+        from testing.support import extra_compile_args
         kwds['extra_compile_args'] = (kwds.get('extra_compile_args', []) +
-                                      ['-Werror'])
+                                      extra_compile_args)
+    if sys.platform == 'darwin':
+        kwds['extra_link_args'] = (kwds.get('extra_link_args', []) +
+                                     ['-stdlib=libc++'])
     return _verify(ffi, module_name, source, *args, **kwds)
 
 def test_set_source_no_slashes():
@@ -82,7 +88,7 @@
                      "(FUNCTION 1)(PRIMITIVE 7)(FUNCTION_END 1)(POINTER 0)")
 
 def test_type_table_array():
-    check_type_table("int a[100];",
+    check_type_table("extern int a[100];",
                      "(PRIMITIVE 7)(ARRAY 0)(None 100)")
 
 def test_type_table_typedef():
@@ -134,7 +140,8 @@
     import math
     ffi = FFI()
     ffi.cdef("float sin(double); double cos(double);")
-    lib = verify(ffi, 'test_math_sin', '#include <math.h>')
+    lib = verify(ffi, 'test_math_sin', '#include <math.h>',
+                 ignore_warnings=True)
     assert lib.cos(1.43) == math.cos(1.43)
 
 def test_repr_lib():
@@ -157,7 +164,7 @@
 
 def test_global_var_array():
     ffi = FFI()
-    ffi.cdef("int a[100];")
+    ffi.cdef("extern int a[100];")
     lib = verify(ffi, 'test_global_var_array', 'int a[100] = { 9999 };')
     lib.a[42] = 123456
     assert lib.a[42] == 123456
@@ -181,27 +188,33 @@
 
 def test_global_var_int():
     ffi = FFI()
-    ffi.cdef("int a, b, c;")
+    ffi.cdef("extern int a, b, c;")
     lib = verify(ffi, 'test_global_var_int', 'int a = 999, b, c;')
     assert lib.a == 999
     lib.a -= 1001
     assert lib.a == -2
     lib.a = -2147483648
     assert lib.a == -2147483648
-    py.test.raises(OverflowError, "lib.a = 2147483648")
-    py.test.raises(OverflowError, "lib.a = -2147483649")
+    with pytest.raises(OverflowError):
+        lib.a = 2147483648
+    with pytest.raises(OverflowError):
+        lib.a = -2147483649
     lib.b = 525      # try with the first access being in setattr, too
     assert lib.b == 525
-    py.test.raises(AttributeError, "del lib.a")
-    py.test.raises(AttributeError, "del lib.c")
-    py.test.raises(AttributeError, "del lib.foobarbaz")
+    with pytest.raises(AttributeError):
+        del lib.a
+    with pytest.raises(AttributeError):
+        del lib.c
+    with pytest.raises(AttributeError):
+        del lib.foobarbaz
 
 def test_macro():
     ffi = FFI()
     ffi.cdef("#define FOOBAR ...")
     lib = verify(ffi, 'test_macro', "#define FOOBAR (-6912)")
     assert lib.FOOBAR == -6912
-    py.test.raises(AttributeError, "lib.FOOBAR = 2")
+    with pytest.raises(AttributeError):
+        lib.FOOBAR = 2
 
 def test_macro_check_value():
     # the value '-0x80000000' in C sources does not have a clear meaning
@@ -247,7 +260,8 @@
     ffi.cdef("static const int FOOBAR;")
     lib = verify(ffi, 'test_constant', "#define FOOBAR (-6912)")
     assert lib.FOOBAR == -6912
-    py.test.raises(AttributeError, "lib.FOOBAR = 2")
+    with pytest.raises(AttributeError):
+        lib.FOOBAR = 2
 
 def test_check_value_of_static_const():
     ffi = FFI()
@@ -263,7 +277,8 @@
     ffi.cdef("static const double FOOBAR;")
     lib = verify(ffi, 'test_constant_nonint', "#define FOOBAR (-6912.5)")
     assert lib.FOOBAR == -6912.5
-    py.test.raises(AttributeError, "lib.FOOBAR = 2")
+    with pytest.raises(AttributeError):
+        lib.FOOBAR = 2
 
 def test_constant_ptr():
     ffi = FFI()
@@ -274,7 +289,7 @@
 
 def test_dir():
     ffi = FFI()
-    ffi.cdef("int ff(int); int aa; static const int my_constant;")
+    ffi.cdef("int ff(int); extern int aa; static const int my_constant;")
     lib = verify(ffi, 'test_dir', """
         #define my_constant  (-45)
         int aa;
@@ -315,8 +330,10 @@
     p = ffi.new("struct foo_s *", {'a': -32768, 'b': -2147483648})
     assert p.a == -32768
     assert p.b == -2147483648
-    py.test.raises(OverflowError, "p.a -= 1")
-    py.test.raises(OverflowError, "p.b -= 1")
+    with pytest.raises(OverflowError):
+        p.a -= 1
+    with pytest.raises(OverflowError):
+        p.b -= 1
     q = ffi.new("struct bar_s *", {'f': p})
     assert q.f == p
     #
@@ -336,9 +353,9 @@
     lib = verify(ffi, 'test_verify_exact_field_offset',
                  """struct foo_s { short a; int b; };""")
     e = py.test.raises(ffi.error, ffi.new, "struct foo_s *", [])    # lazily
-    assert str(e.value) == ("struct foo_s: wrong offset for field 'b' (cdef "
-                       'says 0, but C compiler says 4). fix it or use "...;" '
-                       "in the cdef for struct foo_s to make it flexible")
+    assert str(e.value).startswith(
+        "struct foo_s: wrong offset for field 'b' (cdef "
+        'says 0, but C compiler says 4). fix it or use "...;" ')
 
 def test_type_caching():
     ffi1 = FFI(); ffi1.cdef("struct foo_s;")
@@ -387,19 +404,23 @@
     assert ffi.sizeof("struct foo_s") == (42 + 11) * 4
     p = ffi.new("struct foo_s *")
     assert p.a[41] == p.b[10] == 0
-    py.test.raises(IndexError, "p.a[42]")
-    py.test.raises(IndexError, "p.b[11]")
+    with pytest.raises(IndexError):
+        p.a[42]
+    with pytest.raises(IndexError):
+        p.b[11]
 
 def test_dotdotdot_global_array():
     ffi = FFI()
-    ffi.cdef("int aa[...]; int bb[...];")
+    ffi.cdef("extern int aa[...]; extern int bb[...];")
     lib = verify(ffi, 'test_dotdotdot_global_array',
                  "int aa[41]; int bb[12];")
     assert ffi.sizeof(lib.aa) == 41 * 4
     assert ffi.sizeof(lib.bb) == 12 * 4
     assert lib.aa[40] == lib.bb[11] == 0
-    py.test.raises(IndexError, "lib.aa[41]")
-    py.test.raises(IndexError, "lib.bb[12]")
+    with pytest.raises(IndexError):
+        lib.aa[41]
+    with pytest.raises(IndexError):
+        lib.bb[12]
 
 def test_misdeclared_field_1():
     ffi = FFI()
@@ -545,37 +566,37 @@
 
 def test_bad_size_of_global_1():
     ffi = FFI()
-    ffi.cdef("short glob;")
+    ffi.cdef("extern short glob;")
     py.test.raises(VerificationError, verify, ffi,
                    "test_bad_size_of_global_1", "long glob;")
 
 def test_bad_size_of_global_2():
     ffi = FFI()
-    ffi.cdef("int glob[10];")
+    ffi.cdef("extern int glob[10];")
     py.test.raises(VerificationError, verify, ffi,
                    "test_bad_size_of_global_2", "int glob[9];")
 
 def test_unspecified_size_of_global_1():
     ffi = FFI()
-    ffi.cdef("int glob[];")
+    ffi.cdef("extern int glob[];")
     lib = verify(ffi, "test_unspecified_size_of_global_1", "int glob[10];")
     assert ffi.typeof(lib.glob) == ffi.typeof("int *")
 
 def test_unspecified_size_of_global_2():
     ffi = FFI()
-    ffi.cdef("int glob[][5];")
+    ffi.cdef("extern int glob[][5];")
     lib = verify(ffi, "test_unspecified_size_of_global_2", "int glob[10][5];")
     assert ffi.typeof(lib.glob) == ffi.typeof("int(*)[5]")
 
 def test_unspecified_size_of_global_3():
     ffi = FFI()
-    ffi.cdef("int glob[][...];")
+    ffi.cdef("extern int glob[][...];")
     lib = verify(ffi, "test_unspecified_size_of_global_3", "int glob[10][5];")
     assert ffi.typeof(lib.glob) == ffi.typeof("int(*)[5]")
 
 def test_unspecified_size_of_global_4():
     ffi = FFI()
-    ffi.cdef("int glob[...][...];")
+    ffi.cdef("extern int glob[...][...];")
     lib = verify(ffi, "test_unspecified_size_of_global_4", "int glob[10][5];")
     assert ffi.typeof(lib.glob) == ffi.typeof("int[10][5]")
 
@@ -629,7 +650,7 @@
     ffi.cdef("sshort_t ff3(sshort_t);")
     lib = verify(ffi, "test_include_3",
                  "typedef short sshort_t; //usually from a #include\n"
-                 "sshort_t ff3(sshort_t x) { return x + 42; }")
+                 "sshort_t ff3(sshort_t x) { return (sshort_t)(x + 42); }")
     assert lib.ff3(10) == 52
     assert ffi.typeof(ffi.cast("sshort_t", 42)) is ffi.typeof("short")
     assert ffi1.typeof("sshort_t") is ffi.typeof("sshort_t")
@@ -738,7 +759,7 @@
     ffi = FFI()
     ffi.cdef(unicode("float sin(double); double cos(double);"))
     lib = verify(ffi, 'test_math_sin_unicode', unicode('#include <math.h>'),
-                 libraries=[unicode(lib_m)])
+                 libraries=[unicode(lib_m)], ignore_warnings=True)
     assert lib.cos(1.43) == math.cos(1.43)
 
 def test_incomplete_struct_as_arg():
@@ -798,7 +819,7 @@
 def test_address_of_global_var():
     ffi = FFI()
     ffi.cdef("""
-        long bottom, bottoms[2];
+        extern long bottom, bottoms[2];
         long FetchRectBottom(void);
         long FetchRectBottoms1(void);
         #define FOOBAR 42
@@ -866,15 +887,20 @@
     e5 = py.test.raises(TypeError, lib.foo2)
     e6 = py.test.raises(TypeError, lib.foo2, 42)
     e7 = py.test.raises(TypeError, lib.foo2, 45, 46, 47)
-    assert str(e1.value) == "foo0() takes no arguments (1 given)"
-    assert str(e2.value) == "foo0() takes no arguments (2 given)"
-    assert str(e3.value) == "foo1() takes exactly one argument (0 given)"
-    assert str(e4.value) == "foo1() takes exactly one argument (2 given)"
-    assert str(e5.value) in ["foo2 expected 2 arguments, got 0",
+    def st1(s):
+        s = str(s)
+        if s.startswith("_CFFI_test_unpack_args.Lib."):
+            s = s[len("_CFFI_test_unpack_args.Lib."):]
+        return s
+    assert st1(e1.value) == "foo0() takes no arguments (1 given)"
+    assert st1(e2.value) == "foo0() takes no arguments (2 given)"
+    assert st1(e3.value) == "foo1() takes exactly one argument (0 given)"
+    assert st1(e4.value) == "foo1() takes exactly one argument (2 given)"
+    assert st1(e5.value) in ["foo2 expected 2 arguments, got 0",
                              "foo2() takes exactly 2 arguments (0 given)"]
-    assert str(e6.value) in ["foo2 expected 2 arguments, got 1",
+    assert st1(e6.value) in ["foo2 expected 2 arguments, got 1",
                              "foo2() takes exactly 2 arguments (1 given)"]
-    assert str(e7.value) in ["foo2 expected 2 arguments, got 3",
+    assert st1(e7.value) in ["foo2 expected 2 arguments, got 3",
                              "foo2() takes exactly 2 arguments (3 given)"]
 
 def test_address_of_function():
@@ -882,7 +908,7 @@
     ffi.cdef("long myfunc(long x);")
     lib = verify(ffi, "test_addressof_function", """
         char myfunc(char x) { return (char)(x + 42); }
-    """)
+    """, ignore_warnings=True)
     assert lib.myfunc(5) == 47
     assert lib.myfunc(0xABC05) == 47
     assert not isinstance(lib.myfunc, ffi.CData)
@@ -953,7 +979,7 @@
     ffi = FFI()
     ffi.cdef("""
         typedef ... opaque_t;
-        opaque_t globvar;
+        extern opaque_t globvar;
     """)
     lib = verify(ffi, 'test_variable_of_unknown_size', """
         typedef char opaque_t[6];
@@ -998,7 +1024,7 @@
 def test_call_with_incomplete_structs():
     ffi = FFI()
     ffi.cdef("typedef struct {...;} foo_t; "
-             "foo_t myglob; "
+             "extern foo_t myglob; "
              "foo_t increment(foo_t s); "
              "double getx(foo_t s);")
     lib = verify(ffi, 'test_call_with_incomplete_structs', """
@@ -1020,8 +1046,10 @@
     assert ffi.typeof(s.a) == ffi.typeof("int[5][8]")
     assert ffi.sizeof(s.a) == 40 * ffi.sizeof('int')
     assert s.a[4][7] == 0
-    py.test.raises(IndexError, 's.a[4][8]')
-    py.test.raises(IndexError, 's.a[5][0]')
+    with pytest.raises(IndexError):
+        s.a[4][8]
+    with pytest.raises(IndexError):
+        s.a[5][0]
     assert ffi.typeof(s.a) == ffi.typeof("int[5][8]")
     assert ffi.typeof(s.a[0]) == ffi.typeof("int[8]")
 
@@ -1034,38 +1062,44 @@
     s = ffi.new("struct foo_s *")
     assert ffi.typeof(s.a) == ffi.typeof("int[][7]")
     assert s.a[4][6] == 0
-    py.test.raises(IndexError, 's.a[4][7]')
+    with pytest.raises(IndexError):
+        s.a[4][7]
     assert ffi.typeof(s.a[0]) == ffi.typeof("int[7]")
 
 def test_global_var_array_2():
     ffi = FFI()
-    ffi.cdef("int a[...][...];")
+    ffi.cdef("extern int a[...][...];")
     lib = verify(ffi, 'test_global_var_array_2', 'int a[10][8];')
     lib.a[9][7] = 123456
     assert lib.a[9][7] == 123456
-    py.test.raises(IndexError, 'lib.a[0][8]')
-    py.test.raises(IndexError, 'lib.a[10][0]')
+    with pytest.raises(IndexError):
+        lib.a[0][8]
+    with pytest.raises(IndexError):
+        lib.a[10][0]
     assert ffi.typeof(lib.a) == ffi.typeof("int[10][8]")
     assert ffi.typeof(lib.a[0]) == ffi.typeof("int[8]")
 
 def test_global_var_array_3():
     ffi = FFI()
-    ffi.cdef("int a[][...];")
+    ffi.cdef("extern int a[][...];")
     lib = verify(ffi, 'test_global_var_array_3', 'int a[10][8];')
     lib.a[9][7] = 123456
     assert lib.a[9][7] == 123456
-    py.test.raises(IndexError, 'lib.a[0][8]')
+    with pytest.raises(IndexError):
+        lib.a[0][8]
     assert ffi.typeof(lib.a) == ffi.typeof("int(*)[8]")
     assert ffi.typeof(lib.a[0]) == ffi.typeof("int[8]")
 
 def test_global_var_array_4():
     ffi = FFI()
-    ffi.cdef("int a[10][...];")
+    ffi.cdef("extern int a[10][...];")
     lib = verify(ffi, 'test_global_var_array_4', 'int a[10][8];')
     lib.a[9][7] = 123456
     assert lib.a[9][7] == 123456
-    py.test.raises(IndexError, 'lib.a[0][8]')
-    py.test.raises(IndexError, 'lib.a[10][8]')
+    with pytest.raises(IndexError):
+        lib.a[0][8]
+    with pytest.raises(IndexError):
+        lib.a[10][8]
     assert ffi.typeof(lib.a) == ffi.typeof("int[10][8]")
     assert ffi.typeof(lib.a[0]) == ffi.typeof("int[8]")
 
@@ -1147,7 +1181,7 @@
     lib = verify(ffi, 'test_some_float_invalid_3', """
         typedef long double foo_t;
         foo_t neg(foo_t x) { return -x; }
-    """)
+    """, ignore_warnings=True)
     if ffi.sizeof("long double") == ffi.sizeof("double"):
         assert lib.neg(12.3) == -12.3
     else:
@@ -1181,7 +1215,7 @@
 
 def test_import_from_lib():
     ffi = FFI()
-    ffi.cdef("int mybar(int); int myvar;\n#define MYFOO ...")
+    ffi.cdef("int mybar(int); static int myvar;\n#define MYFOO ...")
     lib = verify(ffi, 'test_import_from_lib',
                  "#define MYFOO 42\n"
                  "static int mybar(int x) { return x + 1; }\n"
@@ -1197,7 +1231,7 @@
 
 def test_macro_var_callback():
     ffi = FFI()
-    ffi.cdef("int my_value; int *(*get_my_value)(void);")
+    ffi.cdef("extern int my_value; extern int *(*get_my_value)(void);")
     lib = verify(ffi, 'test_macro_var_callback',
                  "int *(*get_my_value)(void);\n"
                  "#define my_value (*get_my_value())")
@@ -1312,7 +1346,7 @@
 
 def test_const_function_type_args():
     ffi = FFI()
-    ffi.cdef("""int (*foobar)(const int a, const int *b, const int c[]);""")
+    ffi.cdef("""extern int(*foobar)(const int a,const int*b,const int c[]);""")
     lib = verify(ffi, 'test_const_function_type_args', """
         int (*foobar)(const int a, const int *b, const int c[]);
     """)
@@ -1338,7 +1372,8 @@
         #define aaa 42
     """)
     assert lib.aaa == 42
-    py.test.raises(AttributeError, "lib.aaa = 43")
+    with pytest.raises(AttributeError):
+        lib.aaa = 43
 
 def test_win32_calling_convention_0():
     ffi = FFI()
@@ -1601,7 +1636,7 @@
 
 def test_extern_python_bogus_name():
     ffi = FFI()
-    ffi.cdef("int abc;")
+    ffi.cdef("extern int abc;")
     lib = verify(ffi, 'test_extern_python_bogus_name', "int abc;")
     def fn():
         pass
@@ -1634,9 +1669,10 @@
     with StdErrCapture() as f:
         res = lib.bar(321)
     assert res is None
-    assert f.getvalue() == (
-        "From cffi callback %r:\n" % (bar,) +
-        "Trying to convert the result back to C:\n"
+    msg = f.getvalue()
+    assert "rom cffi callback %r" % (bar,) in msg
+    assert "rying to convert the result back to C:\n" in msg
+    assert msg.endswith(
         "TypeError: callback with the return type 'void' must return None\n")
 
 def test_extern_python_redefine():
@@ -1762,8 +1798,8 @@
     ffi.cdef("""
         extern "Python" int __stdcall foo(int);
         extern "Python" int WINAPI bar(int);
-        int (__stdcall * mycb1)(int);
-        int indirect_call(int);
+        static int (__stdcall * mycb1)(int);
+        static int indirect_call(int);
     """)
     lib = verify(ffi, 'test_extern_python_stdcall', """
         #ifndef _MSC_VER
@@ -1820,7 +1856,7 @@
     ffi = FFI()
     ffi.cdef("float f1(double);")
     lib = verify(ffi, 'test_introspect_function', """
-        float f1(double x) { return x; }
+        float f1(double x) { return (float)x; }
     """)
     assert dir(lib) == ['f1']
     FUNC = ffi.typeof(lib.f1)
@@ -1831,7 +1867,7 @@
 
 def test_introspect_global_var():
     ffi = FFI()
-    ffi.cdef("float g1;")
+    ffi.cdef("extern float g1;")
     lib = verify(ffi, 'test_introspect_global_var', """
         float g1;
     """)
@@ -1842,7 +1878,7 @@
 
 def test_introspect_global_var_array():
     ffi = FFI()
-    ffi.cdef("float g1[100];")
+    ffi.cdef("extern float g1[100];")
     lib = verify(ffi, 'test_introspect_global_var_array', """
         float g1[100];
     """)
@@ -2014,7 +2050,7 @@
     ffi.cdef("float _Complex f1(float a, float b);");
     lib = verify(ffi, "test_function_returns_float_complex", """
         #include <complex.h>
-        static float _Complex f1(float a, float b) { return a + I*2.0*b; }
+        static float _Complex f1(float a, float b) { return a + I*2.0f*b; }
     """, no_cpp=True)    # <complex.h> fails on some systems with C++
     result = lib.f1(1.25, 5.1)
     assert type(result) == complex
@@ -2065,7 +2101,7 @@
     ffi = FFI()
     ffi.cdef("""
         typedef int foo_t[...], bar_t[...];
-        int gv[...];
+        extern int gv[...];
         typedef int mat_t[...][...];
         typedef int vmat_t[][...];
         """)
@@ -2087,6 +2123,40 @@
     p = ffi.new("vmat_t", 4)
     assert ffi.sizeof(p[3]) == 8 * ffi.sizeof("int")
 
+def test_typedef_array_dotdotdot_usage():
+    ffi = FFI()
+    ffi.cdef("""
+        typedef int foo_t[...];
+        typedef int mat_t[...][...];
+        struct s { foo_t a; foo_t *b; foo_t **c; };
+        int myfunc(foo_t a, foo_t *b, foo_t **c);
+        struct sm { mat_t a; mat_t *b; mat_t **c; };
+        int myfuncm(mat_t a, mat_t *b, mat_t **c);
+        """)
+    lib = verify(ffi, "test_typedef_array_dotdotdot_usage", """
+        typedef int foo_t[50];
+        typedef int mat_t[6][7];
+        struct s { foo_t a; foo_t *b; foo_t **c; };
+        static int myfunc(foo_t a, foo_t *b, foo_t **c) { return (**c)[49]; }
+        struct sm { mat_t a; mat_t *b; mat_t **c; };
+        static int myfuncm(mat_t a, mat_t *b, mat_t **c) { return (**c)[5][6]; }
+    """)
+    assert ffi.sizeof("foo_t") == 50 * ffi.sizeof("int")
+    p = ffi.new("struct s *")
+    assert ffi.sizeof(p[0]) == 50 * ffi.sizeof("int") + 2 * ffi.sizeof("void *")
+    p.a[49] = 321
+    p.b = ffi.addressof(p, 'a')
+    p.c = ffi.addressof(p, 'b')
+    assert lib.myfunc(ffi.NULL, ffi.NULL, p.c) == 321
+    #
+    assert ffi.sizeof("mat_t") == 42 * ffi.sizeof("int")
+    p = ffi.new("struct sm *")
+    assert ffi.sizeof(p[0]) == 42 * ffi.sizeof("int") + 2 * ffi.sizeof("void *")
+    p.a[5][6] = -321
+    p.b = ffi.addressof(p, 'a')
+    p.c = ffi.addressof(p, 'b')
+    assert lib.myfuncm(ffi.NULL, ffi.NULL, p.c) == -321
+
 def test_call_with_custom_field_pos():
     ffi = FFI()
     ffi.cdef("""
@@ -2125,7 +2195,8 @@
     lib = verify(ffi, "test_call_with_nested_anonymous_struct", """
         struct foo { int a; union { int b, c; }; };
         struct foo f(void) {
-            struct foo s = { 40 };
+            struct foo s;
+            s.a = 40;
             s.b = 200;
             return s;
         }
@@ -2314,3 +2385,111 @@
         typedef int foo_t; struct foo_s { void (*x)(foo_t); };
     """)
     py.test.raises(TypeError, ffi.new, "struct foo_s *")
+
+def test_from_buffer_struct():
+    ffi = FFI()
+    ffi.cdef("""struct foo_s { int a, b; };""")
+    lib = verify(ffi, "test_from_buffer_struct_p", """
+        struct foo_s { int a, b; };
+    """)
+    p = ffi.new("struct foo_s *", [-219239, 58974983])
+    q = ffi.from_buffer("struct foo_s[]", ffi.buffer(p))
+    assert ffi.typeof(q) == ffi.typeof("struct foo_s[]")
+    assert len(q) == 1
+    assert q[0].a == p.a
+    assert q[0].b == p.b
+    assert q == p
+    q = ffi.from_buffer("struct foo_s *", ffi.buffer(p))
+    assert ffi.typeof(q) == ffi.typeof("struct foo_s *")
+    assert q.a == p.a
+    assert q.b == p.b
+    assert q[0].a == p.a
+    assert q[0].b == p.b
+    assert q == p
+
+def test_unnamed_bitfield_1():
+    ffi = FFI()
+    ffi.cdef("""struct A { char : 1; };""")
+    lib = verify(ffi, "test_unnamed_bitfield_1", """
+        struct A { char : 1; };
+    """)
+    p = ffi.new("struct A *")
+    assert ffi.sizeof(p[0]) == 1
+    # Note: on gcc, the type name is ignored for anonymous bitfields
+    # and that's why the result is 1.  On MSVC, the result is
+    # sizeof("char") which is also 1.
+
+def test_unnamed_bitfield_2():
+    ffi = FFI()
+    ffi.cdef("""struct A {
+        short c : 1; short : 1; short d : 1; short : 1; };""")
+    lib = verify(ffi, "test_unnamed_bitfield_2", """
+        struct A {
+            short c : 1; short : 1; short d : 1; short : 1;
+        };
+    """)
+    p = ffi.new("struct A *")
+    assert ffi.sizeof(p[0]) == ffi.sizeof("short")
+
+def test_unnamed_bitfield_3():
+    ffi = FFI()
+    ffi.cdef("""struct A { struct { char : 1; char : 1; } b; };""")
+    lib = verify(ffi, "test_unnamed_bitfield_3", """
+        struct A { struct { char : 1; char : 1; } b; };
+    """)
+    p = ffi.new("struct A *")
+    assert ffi.sizeof(p[0]) == 1
+    # Note: on gcc, the type name is ignored for anonymous bitfields
+    # and that's why the result is 1.  On MSVC, the result is
+    # sizeof("char") which is also 1.
+
+def test_unnamed_bitfield_4():
+    ffi = FFI()
+    ffi.cdef("""struct A { struct {
+        unsigned c : 1; unsigned : 1; unsigned d : 1; unsigned : 1; } a;
+        };
+        struct B { struct A a; };""")
+    lib = verify(ffi, "test_unnamed_bitfield_4", """
+        struct A { struct {
+            unsigned c : 1; unsigned : 1; unsigned d : 1; unsigned : 1; } a;
+        };
+        struct B { struct A a; };
+    """)
+    b = ffi.new("struct B *")
+    a = ffi.new("struct A *")
+    assert ffi.sizeof(a[0]) == ffi.sizeof("unsigned")
+    assert ffi.sizeof(b[0]) == ffi.sizeof(a[0])
+
+def test_struct_with_func_with_struct_pointer_arg():
+    ffi = FFI()
+    ffi.cdef("""struct BinaryTree {
+            int (* CompareKey)(struct BinaryTree *tree);
+        };""")
+    lib = verify(ffi, "test_struct_with_func_with_struct_pointer_arg", """
+        struct BinaryTree {
+            int (* CompareKey)(struct BinaryTree *tree);
+        };
+    """)
+    ffi.new("struct BinaryTree *")
+
+def test_struct_with_func_with_struct_arg():
+    ffi = FFI()
+    ffi.cdef("""struct BinaryTree {
+            int (* CompareKey)(struct BinaryTree tree);
+        };""")
+    lib = verify(ffi, "test_struct_with_func_with_struct_arg", """
+        struct BinaryTree {
+            int (* CompareKey)(struct BinaryTree tree);
+        };
+    """)
+    py.test.raises(RuntimeError, ffi.new, "struct BinaryTree *")
+
+def test_passing_large_list():
+    ffi = FFI()
+    ffi.cdef("""void passing_large_list(long[]);""")
+    lib = verify(ffi, "test_passing_large_list", """
+        static void passing_large_list(long a[]) { }
+    """)
+    arg = list(range(20000000))
+    lib.passing_large_list(arg)
+    # assert did not segfault
diff --git a/testing/cffi1/test_verify1.py b/testing/cffi1/test_verify1.py
index 75f113d..33244cc 100644
--- a/testing/cffi1/test_verify1.py
+++ b/testing/cffi1/test_verify1.py
@@ -1,9 +1,10 @@
 import os, sys, math, py
+import pytest
 from cffi import FFI, FFIError, VerificationError, VerificationMissing, model
 from cffi import CDefError
 from cffi import recompiler
 from testing.support import *
-from testing.support import _verify
+from testing.support import _verify, extra_compile_args
 import _cffi_backend
 
 lib_m = ['m']
@@ -12,17 +13,6 @@
     import distutils.ccompiler
     if distutils.ccompiler.get_default_compiler() == 'msvc':
         lib_m = ['msvcrt']
-    extra_compile_args = []      # no obvious -Werror equivalent on MSVC
-else:
-    if (sys.platform == 'darwin' and
-          [int(x) for x in os.uname()[2].split('.')] >= [11, 0, 0]):
-        # assume a standard clang or gcc
-        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion']
-        # special things for clang
-        extra_compile_args.append('-Qunused-arguments')
-    else:
-        # assume a standard gcc
-        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion']
 
 class FFI(FFI):
     error = _cffi_backend.FFI.error
@@ -265,7 +255,7 @@
 def test_var_signed_integer_types():
     ffi = FFI()
     lst = all_signed_integer_types(ffi)
-    csource = "\n".join(["%s somevar_%s;" % (tp, tp.replace(' ', '_'))
+    csource = "\n".join(["static %s somevar_%s;" % (tp, tp.replace(' ', '_'))
                          for tp in lst])
     ffi.cdef(csource)
     lib = ffi.verify(csource)
@@ -284,7 +274,7 @@
 def test_var_unsigned_integer_types():
     ffi = FFI()
     lst = all_unsigned_integer_types(ffi)
-    csource = "\n".join(["%s somevar_%s;" % (tp, tp.replace(' ', '_'))
+    csource = "\n".join(["static %s somevar_%s;" % (tp, tp.replace(' ', '_'))
                          for tp in lst])
     ffi.cdef(csource)
     lib = ffi.verify(csource)
@@ -571,7 +561,8 @@
     assert ffi.sizeof('struct foo_s') == 19 * ffi.sizeof('int')
     s = ffi.new("struct foo_s *")
     assert ffi.sizeof(s.a) == 17 * ffi.sizeof('int')
-    py.test.raises(IndexError, 's.a[17]')
+    with pytest.raises(IndexError):
+        s.a[17]
 
 def test_struct_array_c99_1():
     if sys.platform == 'win32':
@@ -629,7 +620,8 @@
     ffi.verify("struct foo_s { int a:2, b:3; };")
     s = ffi.new("struct foo_s *")
     s.b = 3
-    py.test.raises(OverflowError, "s.b = 4")
+    with pytest.raises(OverflowError):
+        s.b = 4
     assert s.b == 3
 
 def test_struct_with_bitfield_enum():
@@ -786,8 +778,8 @@
 
 def test_access_variable():
     ffi = FFI()
-    ffi.cdef("int foo(void);\n"
-             "int somenumber;")
+    ffi.cdef("static int foo(void);\n"
+             "static int somenumber;")
     lib = ffi.verify("""
         static int somenumber = 2;
         static int foo(void) {
@@ -804,7 +796,7 @@
 def test_access_address_of_variable():
     # access the address of 'somenumber': need a trick
     ffi = FFI()
-    ffi.cdef("int somenumber; static int *const somenumberptr;")
+    ffi.cdef("static int somenumber; static int *const somenumberptr;")
     lib = ffi.verify("""
         static int somenumber = 2;
         #define somenumberptr (&somenumber)
@@ -816,8 +808,8 @@
 
 def test_access_array_variable(length=5):
     ffi = FFI()
-    ffi.cdef("int foo(int);\n"
-             "int somenumber[%s];" % (length,))
+    ffi.cdef("static int foo(int);\n"
+             "static int somenumber[%s];" % (length,))
     lib = ffi.verify("""
         static int somenumber[] = {2, 2, 3, 4, 5};
         static int foo(int i) {
@@ -848,8 +840,8 @@
 def test_access_struct_variable():
     ffi = FFI()
     ffi.cdef("struct foo { int x; ...; };\n"
-             "int foo(int);\n"
-             "struct foo stuff;")
+             "static int foo(int);\n"
+             "static struct foo stuff;")
     lib = ffi.verify("""
         struct foo { int x, y, z; };
         static struct foo stuff = {2, 5, 8};
@@ -873,9 +865,9 @@
 
 def test_access_callback():
     ffi = FFI()
-    ffi.cdef("int (*cb)(int);\n"
-             "int foo(int);\n"
-             "void reset_cb(void);")
+    ffi.cdef("static int (*cb)(int);\n"
+             "static int foo(int);\n"
+             "static void reset_cb(void);")
     lib = ffi.verify("""
         static int g(int x) { return x * 7; }
         static int (*cb)(int);
@@ -891,9 +883,9 @@
 def test_access_callback_function_typedef():
     ffi = FFI()
     ffi.cdef("typedef int mycallback_t(int);\n"
-             "mycallback_t *cb;\n"
-             "int foo(int);\n"
-             "void reset_cb(void);")
+             "static mycallback_t *cb;\n"
+             "static int foo(int);\n"
+             "static void reset_cb(void);")
     lib = ffi.verify("""
         static int g(int x) { return x * 7; }
         static int (*cb)(int);
@@ -1034,7 +1026,7 @@
 def test_autofilled_struct_as_argument_dynamic():
     ffi = FFI()
     ffi.cdef("struct foo_s { long a; ...; };\n"
-             "int (*foo)(struct foo_s);")
+             "static int (*foo)(struct foo_s);")
     lib = ffi.verify("""
         struct foo_s {
             double b;
@@ -1043,7 +1035,7 @@
         int foo1(struct foo_s s) {
             return (int)s.a - (int)s.b;
         }
-        int (*foo)(struct foo_s s) = &foo1;
+        static int (*foo)(struct foo_s s) = &foo1;
     """)
     e = py.test.raises(NotImplementedError, lib.foo, "?")
     msg = ("ctype 'struct foo_s' not supported as argument.  It is a struct "
@@ -1419,7 +1411,7 @@
         py.test.skip("_Bool not in MSVC")
     ffi = FFI()
     ffi.cdef("struct foo_s { _Bool x; };"
-             "_Bool foo(_Bool); _Bool (*foop)(_Bool);")
+             "_Bool foo(_Bool); static _Bool (*foop)(_Bool);")
     lib = ffi.verify("""
         struct foo_s { _Bool x; };
         int foo(int arg) {
@@ -1428,13 +1420,15 @@
         _Bool _foofunc(_Bool x) {
             return !x;
         }
-        _Bool (*foop)(_Bool) = _foofunc;
+        static _Bool (*foop)(_Bool) = _foofunc;
     """)
     p = ffi.new("struct foo_s *")
     p.x = 1
     assert p.x is True
-    py.test.raises(OverflowError, "p.x = -1")
-    py.test.raises(TypeError, "p.x = 0.0")
+    with pytest.raises(OverflowError):
+        p.x = -1
+    with pytest.raises(TypeError):
+        p.x = 0.0
     assert lib.foop(1) is False
     assert lib.foop(True) is False
     assert lib.foop(0) is True
@@ -1502,7 +1496,8 @@
                 }
             """ % (type, type))
             p = ffi.new("struct foo_s *")
-            py.test.raises(TypeError, "p.x = 0.0")
+            with pytest.raises(TypeError):
+                p.x = 0.0
             assert lib.foo(42) == 0
             assert lib.foo(0) == 1
             py.test.raises(TypeError, lib.foo, 0.0)
@@ -1610,7 +1605,7 @@
 
 def test_FILE_stored_explicitly():
     ffi = FFI()
-    ffi.cdef("int myprintf11(const char *, int); FILE *myfile;")
+    ffi.cdef("int myprintf11(const char *, int); extern FILE *myfile;")
     lib = ffi.verify("""
         #include <stdio.h>
         FILE *myfile;
@@ -1636,13 +1631,13 @@
 
 def test_global_array_with_missing_length():
     ffi = FFI()
-    ffi.cdef("int fooarray[];")
+    ffi.cdef("extern int fooarray[];")
     lib = ffi.verify("int fooarray[50];")
     assert repr(lib.fooarray).startswith("<cdata 'int *'")
 
 def test_global_array_with_dotdotdot_length():
     ffi = FFI()
-    ffi.cdef("int fooarray[...];")
+    ffi.cdef("extern int fooarray[...];")
     lib = ffi.verify("int fooarray[50];")
     assert repr(lib.fooarray).startswith("<cdata 'int[50]'")
 
@@ -1650,7 +1645,7 @@
     py.test.xfail("was detected only because 23 bytes cannot be divided by 4; "
                   "redo more generally")
     ffi = FFI()
-    ffi.cdef("int fooarray[...];")
+    ffi.cdef("extern int fooarray[...];")
     py.test.raises(VerificationError, ffi.verify, "char fooarray[23];")
 
 def test_struct_containing_struct():
@@ -1771,7 +1766,7 @@
 def test_callback_indirection():
     ffi = FFI()
     ffi.cdef("""
-        int (*python_callback)(int how_many, int *values);
+        static int (*python_callback)(int how_many, int *values);
         int (*const c_callback)(int,...);   /* pass this ptr to C routines */
         int some_c_function(int(*cb)(int,...));
     """)
@@ -1905,23 +1900,23 @@
 
 def test_bug_const_char_ptr_array_1():
     ffi = FFI()
-    ffi.cdef("""const char *a[...];""")
+    ffi.cdef("""extern const char *a[...];""")
     lib = ffi.verify("""const char *a[5];""")
     assert repr(ffi.typeof(lib.a)) == "<ctype 'char *[5]'>"
 
 def test_bug_const_char_ptr_array_2():
     ffi = FFI()
-    ffi.cdef("""const int a[];""")
+    ffi.cdef("""extern const int a[];""")
     lib = ffi.verify("""const int a[5];""")
     assert repr(ffi.typeof(lib.a)) == "<ctype 'int *'>"
 
 def _test_various_calls(force_libffi):
     cdef_source = """
-    int xvalue;
-    long long ivalue, rvalue;
-    float fvalue;
-    double dvalue;
-    long double Dvalue;
+    extern int xvalue;
+    extern long long ivalue, rvalue;
+    extern float fvalue;
+    extern double dvalue;
+    extern long double Dvalue;
     signed char tf_bb(signed char x, signed char c);
     unsigned char tf_bB(signed char x, unsigned char c);
     short tf_bh(signed char x, short c);
@@ -2104,7 +2099,7 @@
     old = sys.getdlopenflags()
     try:
         ffi1 = FFI()
-        ffi1.cdef("int foo_verify_dlopen_flags_1;")
+        ffi1.cdef("extern int foo_verify_dlopen_flags_1;")
         sys.setdlopenflags(ffi1.RTLD_GLOBAL | ffi1.RTLD_NOW)
         lib1 = ffi1.verify("int foo_verify_dlopen_flags_1;")
     finally:
@@ -2193,7 +2188,8 @@
     ffi = FFI()
     ffi.cdef("#define FOO 123")
     lib = ffi.verify("#define FOO 124")     # used to complain
-    e = py.test.raises(ffi.error, "lib.FOO")
+    with pytest.raises(ffi.error) as e:
+        lib.FOO
     assert str(e.value) == ("the C compiler says 'FOO' is equal to 124 (0x7c),"
                             " but the cdef disagrees")
 
@@ -2244,7 +2240,7 @@
 
 def test_macro_var():
     ffi = FFI()
-    ffi.cdef("int myarray[50], my_value;")
+    ffi.cdef("extern int myarray[50], my_value;")
     lib = ffi.verify("""
         int myarray[50];
         int *get_my_value(void) {
diff --git a/testing/embedding/add1.py b/testing/embedding/add1.py
index e5b3de1..6f89ae9 100644
--- a/testing/embedding/add1.py
+++ b/testing/embedding/add1.py
@@ -11,7 +11,11 @@
     sys.stdout.write("preparing")
     for i in range(3):
         sys.stdout.flush()
-        time.sleep(0.2)
+        # Windows: sometimes time.sleep() doesn't sleep at all.
+        # This appears to occur on recent versions of python only.
+        t_end = time.time() + 0.19
+        while time.time() < t_end:
+            time.sleep(0.2)
         sys.stdout.write(".")
     sys.stdout.write("\n")
 
diff --git a/testing/embedding/add_recursive.py b/testing/embedding/add_recursive.py
index 9fa463d..a88aa8f 100644
--- a/testing/embedding/add_recursive.py
+++ b/testing/embedding/add_recursive.py
@@ -3,7 +3,7 @@
 ffi = cffi.FFI()
 
 ffi.embedding_api("""
-    int (*my_callback)(int);
+    extern int (*my_callback)(int);
     int add_rec(int, int);
 """)
 
diff --git a/testing/embedding/test_basic.py b/testing/embedding/test_basic.py
index 8463c3f..8d2e776 100644
--- a/testing/embedding/test_basic.py
+++ b/testing/embedding/test_basic.py
@@ -63,8 +63,8 @@
         output = popen.stdout.read()
         err = popen.wait()
         if err:
-            raise OSError("popen failed with exit code %r: %r" % (
-                err, args))
+            raise OSError(("popen failed with exit code %r: %r\n\n%s" % (
+                err, args, output)).rstrip())
         print(output.rstrip())
         return output
 
@@ -172,7 +172,8 @@
         result = popen.stdout.read()
         err = popen.wait()
         if err:
-            raise OSError("%r failed with exit code %r" % (name, err))
+            raise OSError("%r failed with exit code %r" % (
+                os.path.join(path, executable_name), err))
         return result
 
 
@@ -205,3 +206,9 @@
         self.compile('add1-test', [initerror_cffi])
         output = self.execute('add1-test')
         assert output == "got: 0 0\n"    # plus lots of info to stderr
+
+    def test_embedding_with_unicode(self):
+        withunicode_cffi = self.prepare_module('withunicode')
+        self.compile('add1-test', [withunicode_cffi])
+        output = self.execute('add1-test')
+        assert output == "255\n4660\n65244\ngot: 0 0\n"
diff --git a/testing/embedding/test_performance.py b/testing/embedding/test_performance.py
index f9f2605..a0e8458 100644
--- a/testing/embedding/test_performance.py
+++ b/testing/embedding/test_performance.py
@@ -2,8 +2,8 @@
 from testing.embedding.test_basic import EmbeddingTests
 
 if sys.platform == 'win32':
-    import py
-    py.test.skip("written with POSIX functions")
+    import pytest
+    pytestmark = pytest.mark.skip("written with POSIX functions")
 
 
 class TestPerformance(EmbeddingTests):
diff --git a/testing/embedding/test_thread.py b/testing/embedding/test_thread.py
index 1895076..9a5936d 100644
--- a/testing/embedding/test_thread.py
+++ b/testing/embedding/test_thread.py
@@ -21,17 +21,21 @@
         add1_cffi = self.prepare_module('add1')
         add2_cffi = self.prepare_module('add2')
         self.compile('thread2-test', [add1_cffi, add2_cffi], threads=True)
-        output = self.execute('thread2-test')
-        output = self._take_out(output, "preparing")
-        output = self._take_out(output, ".")
-        output = self._take_out(output, ".")
-        # at least the 3rd dot should be after everything from ADD2
-        assert output == ("starting\n"
-                          "prepADD2\n"
-                          "adding 1000 and 200 and 30\n"
-                          ".\n"
-                          "adding 40 and 2\n"
-                          "done\n")
+        for i in range(3):
+            output = self.execute('thread2-test')
+            print('='*79)
+            print(output)
+            print('='*79)
+            output = self._take_out(output, "preparing")
+            output = self._take_out(output, ".")
+            output = self._take_out(output, ".")
+            # at least the 3rd dot should be after everything from ADD2
+            assert output == ("starting\n"
+                              "prepADD2\n"
+                              "adding 1000 and 200 and 30\n"
+                              ".\n"
+                              "adding 40 and 2\n"
+                              "done\n")
 
     def test_alt_issue(self):
         add1_cffi = self.prepare_module('add1')
diff --git a/testing/embedding/withunicode.py b/testing/embedding/withunicode.py
new file mode 100644
index 0000000..839c6cd
--- /dev/null
+++ b/testing/embedding/withunicode.py
@@ -0,0 +1,26 @@
+import sys, cffi
+if sys.version_info < (3,):
+    u_prefix = "u"
+else:
+    u_prefix = ""
+    unichr = chr
+
+
+ffi = cffi.FFI()
+
+ffi.embedding_api(u"""
+    int add1(int, int);
+""")
+
+ffi.embedding_init_code(("""
+    import sys, time
+    for c in %s'""" + unichr(0x00ff) + unichr(0x1234) + unichr(0xfedc) + """':
+        sys.stdout.write(str(ord(c)) + '\\n')
+    sys.stdout.flush()
+""") % u_prefix)
+
+ffi.set_source("_withunicode_cffi", """
+""")
+
+fn = ffi.compile(verbose=True)
+print('FILENAME: %s' % (fn,))
diff --git a/testing/support.py b/testing/support.py
index 65f010c..6339a94 100644
--- a/testing/support.py
+++ b/testing/support.py
@@ -1,7 +1,7 @@
-import sys
+import sys, os
 
 if sys.version_info < (3,):
-    __all__ = ['u']
+    __all__ = ['u', 'arraytostring']
 
     class U(object):
         def __add__(self, other):
@@ -12,12 +12,16 @@
     assert u+'a\x00b' == eval(r"u'a\x00b'")
     assert u+'a\u1234b' == eval(r"u'a\u1234b'")
     assert u+'a\U00012345b' == eval(r"u'a\U00012345b'")
+    def arraytostring(a):
+        return a.tostring()
 
 else:
-    __all__ = ['u', 'unicode', 'long']
+    __all__ = ['u', 'unicode', 'long', 'arraytostring']
     u = ""
     unicode = str
     long = int
+    def arraytostring(a):
+        return a.tobytes()
 
 
 class StdErrCapture(object):
@@ -29,9 +33,14 @@
             from io import StringIO
         self.old_stderr = sys.stderr
         sys.stderr = f = StringIO()
+        if hasattr(sys, '__unraisablehook__'):           # work around pytest
+            self.old_unraisablebook = sys.unraisablehook # on recent CPythons
+            sys.unraisablehook = sys.__unraisablehook__
         return f
     def __exit__(self, *args):
         sys.stderr = self.old_stderr
+        if hasattr(self, 'old_unraisablebook'):
+            sys.unraisablehook = self.old_unraisablebook
 
 
 class FdWriteCapture(object):
@@ -86,3 +95,25 @@
         if not name.startswith('_') and not hasattr(module.ffi, name):
             setattr(ffi, name, NotImplemented)
     return module.lib
+
+
+# For testing, we call gcc with "-Werror".  This is fragile because newer
+# versions of gcc are always better at producing warnings, particularly for
+# auto-generated code.  We need here to adapt and silence them as needed.
+
+if sys.platform == 'win32':
+    extra_compile_args = []      # no obvious -Werror equivalent on MSVC
+else:
+    if (sys.platform == 'darwin' and
+          [int(x) for x in os.uname()[2].split('.')] >= [11, 0, 0]):
+        # assume a standard clang or gcc
+        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion',
+                              '-Wno-unused-parameter',
+                              '-Wno-unreachable-code']
+        # special things for clang
+        extra_compile_args.append('-Qunused-arguments')
+    else:
+        # assume a standard gcc
+        extra_compile_args = ['-Werror', '-Wall', '-Wextra', '-Wconversion',
+                              '-Wno-unused-parameter',
+                              '-Wno-unreachable-code']
diff --git a/testing/udir.py b/testing/udir.py
index 4dd0a11..59db1c4 100644
--- a/testing/udir.py
+++ b/testing/udir.py
@@ -1,7 +1,134 @@
 import py
-import sys
+import sys, os, atexit
 
-udir = py.path.local.make_numbered_dir(prefix = 'ffi-')
+
+# This is copied from PyPy's vendored py lib.  The latest py lib release
+# (1.8.1) contains a bug and crashes if it sees another temporary directory
+# in which we don't have write permission (e.g. because it's owned by someone
+# else).
+def make_numbered_dir(prefix='session-', rootdir=None, keep=3,
+                      lock_timeout = 172800,   # two days
+                      min_timeout = 300):      # five minutes
+    """ return unique directory with a number greater than the current
+        maximum one.  The number is assumed to start directly after prefix.
+        if keep is true directories with a number less than (maxnum-keep)
+        will be removed.
+    """
+    if rootdir is None:
+        rootdir = py.path.local.get_temproot()
+
+    def parse_num(path):
+        """ parse the number out of a path (if it matches the prefix) """
+        bn = path.basename
+        if bn.startswith(prefix):
+            try:
+                return int(bn[len(prefix):])
+            except ValueError:
+                pass
+
+    # compute the maximum number currently in use with the
+    # prefix
+    lastmax = None
+    while True:
+        maxnum = -1
+        for path in rootdir.listdir():
+            num = parse_num(path)
+            if num is not None:
+                maxnum = max(maxnum, num)
+
+        # make the new directory
+        try:
+            udir = rootdir.mkdir(prefix + str(maxnum+1))
+        except py.error.EEXIST:
+            # race condition: another thread/process created the dir
+            # in the meantime.  Try counting again
+            if lastmax == maxnum:
+                raise
+            lastmax = maxnum
+            continue
+        break
+
+    # put a .lock file in the new directory that will be removed at
+    # process exit
+    if lock_timeout:
+        lockfile = udir.join('.lock')
+        mypid = os.getpid()
+        if hasattr(lockfile, 'mksymlinkto'):
+            lockfile.mksymlinkto(str(mypid))
+        else:
+            lockfile.write(str(mypid))
+        def try_remove_lockfile():
+            # in a fork() situation, only the last process should
+            # remove the .lock, otherwise the other processes run the
+            # risk of seeing their temporary dir disappear.  For now
+            # we remove the .lock in the parent only (i.e. we assume
+            # that the children finish before the parent).
+            if os.getpid() != mypid:
+                return
+            try:
+                lockfile.remove()
+            except py.error.Error:
+                pass
+        atexit.register(try_remove_lockfile)
+
+    # prune old directories
+    if keep:
+        for path in rootdir.listdir():
+            num = parse_num(path)
+            if num is not None and num <= (maxnum - keep):
+                if min_timeout:
+                    # NB: doing this is needed to prevent (or reduce
+                    # a lot the chance of) the following situation:
+                    # 'keep+1' processes call make_numbered_dir() at
+                    # the same time, they create dirs, but then the
+                    # last process notices the first dir doesn't have
+                    # (yet) a .lock in it and kills it.
+                    try:
+                        t1 = path.lstat().mtime
+                        t2 = lockfile.lstat().mtime
+                        if abs(t2-t1) < min_timeout:
+                            continue   # skip directories too recent
+                    except py.error.Error:
+                        continue   # failure to get a time, better skip
+                lf = path.join('.lock')
+                try:
+                    t1 = lf.lstat().mtime
+                    t2 = lockfile.lstat().mtime
+                    if not lock_timeout or abs(t2-t1) < lock_timeout:
+                        continue   # skip directories still locked
+                except py.error.Error:
+                    pass   # assume that it means that there is no 'lf'
+                try:
+                    path.remove(rec=1)
+                except KeyboardInterrupt:
+                    raise
+                except: # this might be py.error.Error, WindowsError ...
+                    pass
+
+    # make link...
+    try:
+        username = os.environ['USER']           #linux, et al
+    except KeyError:
+        try:
+            username = os.environ['USERNAME']   #windows
+        except KeyError:
+            username = 'current'
+
+    src  = str(udir)
+    dest = src[:src.rfind('-')] + '-' + username
+    try:
+        os.unlink(dest)
+    except OSError:
+        pass
+    try:
+        os.symlink(src, dest)
+    except (OSError, AttributeError, NotImplementedError):
+        pass
+
+    return udir
+
+
+udir = make_numbered_dir(prefix = 'ffi-')
 
 
 # Windows-only workaround for some configurations: see
