Snap for 8554636 from aa347ab80ee55e0594c1988abf6fbcfb5cd80a11 to sdk-release

Change-Id: I6a8a1528ec76107eb44fca2001fff520ac86aa0b
diff --git a/Android.bp b/Android.bp
index ea7a7bd..3629a11 100644
--- a/Android.bp
+++ b/Android.bp
@@ -493,31 +493,63 @@
 // Tests
 cc_test {
     name: "boringssl_crypto_test",
-    test_suites: ["device-tests"],
-    host_supported: true,
+    test_config: "NativeTests.xml",
+    host_supported: false,
+    per_testcase_directory: true,
+    compile_multilib: "both",
+    multilib: {
+        lib32: {
+            suffix: "32",
+        },
+        lib64: {
+            suffix: "64",
+        },
+    },
     defaults: [
         "boringssl_crypto_test_sources",
         "boringssl_flags",
     ],
     whole_static_libs: ["boringssl_test_support"],
-
-    shared_libs: ["libcrypto"],
+    // Statically link the library to test to ensure we always pick up the
+    // correct version regardless of device linker configuration.
+    static_libs: ["libcrypto_static"],
+    target: {
+        android: {
+            test_suites: ["mts-conscrypt"],
+        },
+    },
 }
 
 cc_test {
     name: "boringssl_ssl_test",
-    test_suites: ["device-tests"],
-    host_supported: true,
+    test_config: "NativeTests.xml",
+    host_supported: false,
+    per_testcase_directory: true,
+    compile_multilib: "both",
+    multilib: {
+        lib32: {
+            suffix: "32",
+        },
+        lib64: {
+            suffix: "64",
+        },
+    },
     defaults: [
         "boringssl_ssl_test_sources",
         "boringssl_flags",
     ],
     whole_static_libs: ["boringssl_test_support"],
-
-    shared_libs: [
-        "libcrypto",
+    // Statically link the libraries to test to ensure we always pick up the
+    // correct version regardless of device linker configuration.
+    static_libs: [
+        "libcrypto_static",
         "libssl",
     ],
+    target: {
+        android: {
+            test_suites: ["mts-conscrypt"],
+        },
+    },
 }
 
 // Utility binary for CMVP on-site testing.
@@ -534,3 +566,63 @@
         "src/util/fipstools/test_fips.c",
     ],
 }
+
+// Rust bindings
+rust_bindgen {
+    name: "libbssl_sys_raw",
+    source_stem: "bindings",
+    crate_name: "bssl_sys_raw",
+    host_supported: true,
+    wrapper_src: "src/rust/wrapper.h",
+    bindgen_flags: [
+        "--no-derive-default",
+        "--enable-function-attribute-detection",
+        "--use-core",
+        "--size_t-is-usize",
+        "--default-macro-constant-type=signed",
+        "--rustified-enum=point_conversion_form_t",
+        // These are not BoringSSL symbols, they are from glibc
+        // and are not relevant to the build besides throwing warnings
+        // about their 'long double' (aka u128) not being FFI safe.
+        // We block those functions so that the build doesn't
+        // spam warnings.
+        //
+        // https://github.com/rust-lang/rust-bindgen/issues/1549 describes the current problem
+        // and other folks' solutions.
+        "--blocklist-function=strtold",
+        "--blocklist-function=qecvt",
+        "--blocklist-function=qecvt_r",
+        "--blocklist-function=qgcvt",
+        "--blocklist-function=qfcvt",
+        "--blocklist-function=qfcvt_r",
+    ],
+    shared_libs: [
+        "libcrypto",
+        "libssl",
+    ],
+}
+
+cc_library_static {
+    name: "libbssl_rust_support",
+    host_supported: true,
+    defaults: ["boringssl_flags"],
+    srcs: ["src/rust/rust_wrapper.c"],
+    shared_libs: [
+        "libcrypto",
+        "libssl",
+    ],
+}
+
+rust_library {
+    name: "libbssl_ffi",
+    host_supported: true,
+    crate_name: "bssl_ffi",
+    visibility: ["//external/rust/crates/openssl"],
+    srcs: ["src/rust/src/lib.rs"],
+    // Since libbssl_sys_raw is not publically visible, we can't
+    // accidentally force a double-link by linking statically, so do so.
+    rlibs: ["libbssl_sys_raw"],
+    static_libs: [
+        "libbssl_rust_support",
+    ],
+}
diff --git a/BORINGSSL_REVISION b/BORINGSSL_REVISION
index 95a1efc..26d2c0d 100644
--- a/BORINGSSL_REVISION
+++ b/BORINGSSL_REVISION
@@ -1 +1 @@
-81502beeddc5f116d44d0898c6c4a33057198db8
+c9a7dd687987666df5910f2b35fdc8c3d1e5ed05
diff --git a/BUILD.generated.bzl b/BUILD.generated.bzl
index bf9efa7..5e19592 100644
--- a/BUILD.generated.bzl
+++ b/BUILD.generated.bzl
@@ -266,7 +266,6 @@
     "src/crypto/asn1/a_bool.c",
     "src/crypto/asn1/a_d2i_fp.c",
     "src/crypto/asn1/a_dup.c",
-    "src/crypto/asn1/a_enum.c",
     "src/crypto/asn1/a_gentm.c",
     "src/crypto/asn1/a_i2d_fp.c",
     "src/crypto/asn1/a_int.c",
diff --git a/NativeTests.xml b/NativeTests.xml
new file mode 100644
index 0000000..d3eb944
--- /dev/null
+++ b/NativeTests.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  ~ Copyright (C) 2022 The Android Open Source Project
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~      http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  ~
+  ~ Re-runs a subset of MtsConscryptTestCases using Conscrypt's file-descriptor based
+  ~ implementation to ensure there are no regressions in this implementation before
+  ~ it is fully deprecated.
+  ~
+  ~ Apart from the include filters and SSLSocket implementation this test suite is
+  ~ identical to MtsConscryptTestCases.
+  -->
+<configuration description="Configuration for BoringSSL native tests">
+   <option name="test-suite-tag" value="mts-conscrypt" />
+   <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
+       <option name="cleanup" value="true" />
+       <option name="push" value="boringssl_crypto_test->/data/local/tmp/boringssl_crypto_test" />
+       <option name="push" value="boringssl_ssl_test->/data/local/tmp/boringssl_ssl_test" />
+       <option name="append-bitness" value="true" />
+   </target_preparer>
+   <target_preparer class="com.android.tradefed.targetprep.RootTargetPreparer"/>
+   <test class="com.android.tradefed.testtype.GTest" >
+       <option name="native-test-device-path" value="/data/local/tmp" />
+       <option name="module-name" value="boringssl_crypto_test" />
+       <option name="module-name" value="boringssl_ssl_test" />
+       <option name="runtime-hint" value="10m" />
+       <option name="native-test-timeout" value="600000" />
+   </test>
+</configuration>
diff --git a/android-sources.cmake b/android-sources.cmake
index 15079b3..294c054 100644
--- a/android-sources.cmake
+++ b/android-sources.cmake
@@ -22,7 +22,6 @@
   ${BORINGSSL_ROOT}src/crypto/asn1/a_bool.c
   ${BORINGSSL_ROOT}src/crypto/asn1/a_d2i_fp.c
   ${BORINGSSL_ROOT}src/crypto/asn1/a_dup.c
-  ${BORINGSSL_ROOT}src/crypto/asn1/a_enum.c
   ${BORINGSSL_ROOT}src/crypto/asn1/a_gentm.c
   ${BORINGSSL_ROOT}src/crypto/asn1/a_i2d_fp.c
   ${BORINGSSL_ROOT}src/crypto/asn1/a_int.c
diff --git a/err_data.c b/err_data.c
index de52cc0..ec391d5 100644
--- a/err_data.c
+++ b/err_data.c
@@ -55,188 +55,188 @@
 OPENSSL_STATIC_ASSERT(ERR_NUM_LIBS == 34, "number of libraries changed");
 
 const uint32_t kOpenSSLReasonValues[] = {
-    0xc320862,
-    0xc32887c,
-    0xc33088b,
-    0xc33889b,
-    0xc3408aa,
-    0xc3488c3,
-    0xc3508cf,
-    0xc3588ec,
-    0xc36090c,
-    0xc36891a,
-    0xc37092a,
-    0xc378937,
-    0xc380947,
-    0xc388952,
-    0xc390968,
-    0xc398977,
-    0xc3a098b,
-    0xc3a886f,
+    0xc320885,
+    0xc32889f,
+    0xc3308ae,
+    0xc3388be,
+    0xc3408cd,
+    0xc3488e6,
+    0xc3508f2,
+    0xc35890f,
+    0xc36092f,
+    0xc36893d,
+    0xc37094d,
+    0xc37895a,
+    0xc38096a,
+    0xc388975,
+    0xc39098b,
+    0xc39899a,
+    0xc3a09ae,
+    0xc3a8892,
     0xc3b00f7,
-    0xc3b88fe,
-    0x1032086f,
-    0x103295e5,
-    0x103315f1,
-    0x1033960a,
-    0x1034161d,
-    0x10348f4f,
-    0x10350c88,
-    0x10359630,
-    0x1036165a,
-    0x1036966d,
-    0x1037168c,
-    0x103796a5,
-    0x103816ba,
-    0x103896d8,
-    0x103916e7,
-    0x10399703,
-    0x103a171e,
-    0x103a972d,
-    0x103b1749,
-    0x103b9764,
-    0x103c178a,
+    0xc3b8921,
+    0x10320892,
+    0x10329608,
+    0x10331614,
+    0x1033962d,
+    0x10341640,
+    0x10348f72,
+    0x10350cab,
+    0x10359653,
+    0x1036167d,
+    0x10369690,
+    0x103716af,
+    0x103796c8,
+    0x103816dd,
+    0x103896fb,
+    0x1039170a,
+    0x10399726,
+    0x103a1741,
+    0x103a9750,
+    0x103b176c,
+    0x103b9787,
+    0x103c17ad,
     0x103c80f7,
-    0x103d179b,
-    0x103d97af,
-    0x103e17ce,
-    0x103e97dd,
-    0x103f17f4,
-    0x103f9807,
-    0x10400c4c,
-    0x1040981a,
-    0x10411838,
-    0x1041984b,
-    0x10421865,
-    0x10429875,
-    0x10431889,
-    0x1043989f,
-    0x104418b7,
-    0x104498cc,
-    0x104518e0,
-    0x104598f2,
-    0x10460625,
-    0x10468977,
-    0x10471907,
-    0x1047991e,
-    0x10481933,
-    0x10489941,
-    0x10490e9b,
-    0x1049977b,
-    0x104a1645,
-    0x14320c2f,
-    0x14328c3d,
-    0x14330c4c,
-    0x14338c5e,
+    0x103d17be,
+    0x103d97d2,
+    0x103e17f1,
+    0x103e9800,
+    0x103f1817,
+    0x103f982a,
+    0x10400c6f,
+    0x1040983d,
+    0x1041185b,
+    0x1041986e,
+    0x10421888,
+    0x10429898,
+    0x104318ac,
+    0x104398c2,
+    0x104418da,
+    0x104498ef,
+    0x10451903,
+    0x10459915,
+    0x10460635,
+    0x1046899a,
+    0x1047192a,
+    0x10479941,
+    0x10481956,
+    0x10489964,
+    0x10490ebe,
+    0x1049979e,
+    0x104a1668,
+    0x14320c52,
+    0x14328c60,
+    0x14330c6f,
+    0x14338c81,
     0x143400b9,
     0x143480f7,
     0x18320090,
-    0x18328fa5,
+    0x18328fc8,
     0x183300b9,
-    0x18338fbb,
-    0x18340fcf,
+    0x18338fde,
+    0x18340ff2,
     0x183480f7,
-    0x18350fee,
-    0x18359006,
-    0x1836101b,
-    0x1836902f,
-    0x18371067,
-    0x1837907d,
-    0x18381091,
-    0x183890a1,
-    0x18390a9d,
-    0x183990b1,
-    0x183a10d7,
-    0x183a90fd,
-    0x183b0ca7,
-    0x183b914c,
-    0x183c115e,
-    0x183c9169,
-    0x183d1179,
-    0x183d918a,
-    0x183e119b,
-    0x183e91ad,
-    0x183f11d6,
-    0x183f91ef,
-    0x18401207,
-    0x184086fd,
-    0x18411120,
-    0x184190eb,
-    0x1842110a,
-    0x18428c94,
-    0x184310c6,
-    0x18439132,
-    0x18440fe4,
-    0x18449053,
-    0x20321241,
-    0x2032922e,
-    0x2432124d,
-    0x243289bd,
-    0x2433125f,
-    0x2433926c,
-    0x24341279,
-    0x2434928b,
-    0x2435129a,
-    0x243592b7,
-    0x243612c4,
-    0x243692d2,
-    0x243712e0,
-    0x243792ee,
-    0x243812f7,
-    0x24389304,
-    0x24391317,
-    0x28320c7c,
-    0x28328ca7,
-    0x28330c4c,
-    0x28338cba,
-    0x28340c88,
+    0x18351011,
+    0x18359029,
+    0x1836103e,
+    0x18369052,
+    0x1837108a,
+    0x183790a0,
+    0x183810b4,
+    0x183890c4,
+    0x18390ac0,
+    0x183990d4,
+    0x183a10fa,
+    0x183a9120,
+    0x183b0cca,
+    0x183b916f,
+    0x183c1181,
+    0x183c918c,
+    0x183d119c,
+    0x183d91ad,
+    0x183e11be,
+    0x183e91d0,
+    0x183f11f9,
+    0x183f9212,
+    0x1840122a,
+    0x1840870d,
+    0x18411143,
+    0x1841910e,
+    0x1842112d,
+    0x18428cb7,
+    0x184310e9,
+    0x18439155,
+    0x18441007,
+    0x18449076,
+    0x20321264,
+    0x20329251,
+    0x24321270,
+    0x243289e0,
+    0x24331282,
+    0x2433928f,
+    0x2434129c,
+    0x243492ae,
+    0x243512bd,
+    0x243592da,
+    0x243612e7,
+    0x243692f5,
+    0x24371303,
+    0x24379311,
+    0x2438131a,
+    0x24389327,
+    0x2439133a,
+    0x28320c9f,
+    0x28328cca,
+    0x28330c6f,
+    0x28338cdd,
+    0x28340cab,
     0x283480b9,
     0x283500f7,
-    0x28358c94,
-    0x2c323284,
-    0x2c32932e,
-    0x2c333292,
-    0x2c33b2a4,
-    0x2c3432b8,
-    0x2c34b2ca,
-    0x2c3532e5,
-    0x2c35b2f7,
-    0x2c363327,
+    0x28358cb7,
+    0x2c3232a7,
+    0x2c329351,
+    0x2c3332b5,
+    0x2c33b2c7,
+    0x2c3432db,
+    0x2c34b2ed,
+    0x2c353308,
+    0x2c35b31a,
+    0x2c36334a,
     0x2c36833a,
-    0x2c373334,
-    0x2c37b360,
-    0x2c383385,
-    0x2c38b39c,
-    0x2c3933ba,
-    0x2c39b3ca,
-    0x2c3a33dc,
-    0x2c3ab3f0,
-    0x2c3b3401,
-    0x2c3bb420,
-    0x2c3c1340,
-    0x2c3c9356,
-    0x2c3d3465,
-    0x2c3d936f,
-    0x2c3e348f,
-    0x2c3eb49d,
-    0x2c3f34b5,
-    0x2c3fb4cd,
-    0x2c4034f7,
-    0x2c409241,
-    0x2c413508,
-    0x2c41b51b,
-    0x2c421207,
-    0x2c42b52c,
-    0x2c43074a,
-    0x2c43b412,
-    0x2c443373,
-    0x2c44b4da,
-    0x2c45330a,
-    0x2c45b346,
-    0x2c4633aa,
-    0x2c46b434,
-    0x2c473449,
-    0x2c47b482,
+    0x2c373357,
+    0x2c37b383,
+    0x2c3833a8,
+    0x2c38b3bf,
+    0x2c3933dd,
+    0x2c39b3ed,
+    0x2c3a33ff,
+    0x2c3ab413,
+    0x2c3b3424,
+    0x2c3bb443,
+    0x2c3c1363,
+    0x2c3c9379,
+    0x2c3d3488,
+    0x2c3d9392,
+    0x2c3e34b2,
+    0x2c3eb4c0,
+    0x2c3f34d8,
+    0x2c3fb4f0,
+    0x2c40351a,
+    0x2c409264,
+    0x2c41352b,
+    0x2c41b53e,
+    0x2c42122a,
+    0x2c42b54f,
+    0x2c43076d,
+    0x2c43b435,
+    0x2c443396,
+    0x2c44b4fd,
+    0x2c45332d,
+    0x2c45b369,
+    0x2c4633cd,
+    0x2c46b457,
+    0x2c47346c,
+    0x2c47b4a5,
     0x30320000,
     0x30328015,
     0x3033001f,
@@ -281,528 +281,530 @@
     0x3046833a,
     0x30470372,
     0x30478384,
-    0x30480392,
-    0x304883a3,
-    0x304903b2,
-    0x304983ca,
-    0x304a03dc,
-    0x304a83f0,
-    0x304b0408,
-    0x304b841b,
-    0x304c0426,
-    0x304c8437,
-    0x304d0443,
-    0x304d8459,
-    0x304e0467,
-    0x304e847d,
-    0x304f048f,
-    0x304f84a1,
-    0x305004c4,
-    0x305084d7,
-    0x305104e8,
-    0x305184f8,
-    0x30520510,
-    0x30528525,
-    0x3053053d,
-    0x30538551,
-    0x30540569,
-    0x30548582,
-    0x3055059b,
-    0x305585b8,
-    0x305605c3,
-    0x305685db,
-    0x305705eb,
-    0x305785fc,
-    0x3058060f,
-    0x30588625,
-    0x3059062e,
-    0x30598643,
-    0x305a0656,
-    0x305a8665,
-    0x305b0685,
-    0x305b8694,
-    0x305c06b5,
-    0x305c86d1,
-    0x305d06dd,
-    0x305d86fd,
-    0x305e0719,
-    0x305e872a,
-    0x305f0740,
-    0x305f874a,
-    0x306004b4,
+    0x304803a2,
+    0x304883b3,
+    0x304903c2,
+    0x304983da,
+    0x304a03ec,
+    0x304a8400,
+    0x304b0418,
+    0x304b842b,
+    0x304c0436,
+    0x304c8447,
+    0x304d0453,
+    0x304d8469,
+    0x304e0477,
+    0x304e848d,
+    0x304f049f,
+    0x304f84b1,
+    0x305004d4,
+    0x305084e7,
+    0x305104f8,
+    0x30518508,
+    0x30520520,
+    0x30528535,
+    0x3053054d,
+    0x30538561,
+    0x30540579,
+    0x30548592,
+    0x305505ab,
+    0x305585c8,
+    0x305605d3,
+    0x305685eb,
+    0x305705fb,
+    0x3057860c,
+    0x3058061f,
+    0x30588635,
+    0x3059063e,
+    0x30598653,
+    0x305a0666,
+    0x305a8675,
+    0x305b0695,
+    0x305b86a4,
+    0x305c06c5,
+    0x305c86e1,
+    0x305d06ed,
+    0x305d870d,
+    0x305e0729,
+    0x305e874d,
+    0x305f0763,
+    0x305f876d,
+    0x306004c4,
     0x3060804a,
     0x30610357,
-    0x34320b8d,
-    0x34328ba1,
-    0x34330bbe,
-    0x34338bd1,
-    0x34340be0,
-    0x34348c19,
-    0x34350bfd,
+    0x3061873a,
+    0x30620392,
+    0x34320bb0,
+    0x34328bc4,
+    0x34330be1,
+    0x34338bf4,
+    0x34340c03,
+    0x34348c3c,
+    0x34350c20,
     0x3c320090,
-    0x3c328ce4,
-    0x3c330cfd,
-    0x3c338d18,
-    0x3c340d35,
-    0x3c348d5f,
-    0x3c350d7a,
-    0x3c358da0,
-    0x3c360db9,
-    0x3c368dd1,
-    0x3c370de2,
-    0x3c378df0,
-    0x3c380dfd,
-    0x3c388e11,
-    0x3c390ca7,
-    0x3c398e34,
-    0x3c3a0e48,
-    0x3c3a8937,
-    0x3c3b0e58,
-    0x3c3b8e73,
-    0x3c3c0e85,
-    0x3c3c8eb8,
-    0x3c3d0ec2,
-    0x3c3d8ed6,
-    0x3c3e0ee4,
-    0x3c3e8f09,
-    0x3c3f0cd0,
-    0x3c3f8ef2,
+    0x3c328d07,
+    0x3c330d20,
+    0x3c338d3b,
+    0x3c340d58,
+    0x3c348d82,
+    0x3c350d9d,
+    0x3c358dc3,
+    0x3c360ddc,
+    0x3c368df4,
+    0x3c370e05,
+    0x3c378e13,
+    0x3c380e20,
+    0x3c388e34,
+    0x3c390cca,
+    0x3c398e57,
+    0x3c3a0e6b,
+    0x3c3a895a,
+    0x3c3b0e7b,
+    0x3c3b8e96,
+    0x3c3c0ea8,
+    0x3c3c8edb,
+    0x3c3d0ee5,
+    0x3c3d8ef9,
+    0x3c3e0f07,
+    0x3c3e8f2c,
+    0x3c3f0cf3,
+    0x3c3f8f15,
     0x3c4000b9,
     0x3c4080f7,
-    0x3c410d50,
-    0x3c418d8f,
-    0x3c420e9b,
-    0x3c428e25,
-    0x403219d3,
-    0x403299e9,
-    0x40331a17,
-    0x40339a21,
-    0x40341a38,
-    0x40349a56,
-    0x40351a66,
-    0x40359a78,
-    0x40361a85,
-    0x40369a91,
-    0x40371aa6,
-    0x40379ab8,
-    0x40381ac3,
-    0x40389ad5,
-    0x40390f4f,
-    0x40399ae5,
-    0x403a1af8,
-    0x403a9b19,
-    0x403b1b2a,
-    0x403b9b3a,
+    0x3c410d73,
+    0x3c418db2,
+    0x3c420ebe,
+    0x3c428e48,
+    0x403219f6,
+    0x40329a0c,
+    0x40331a3a,
+    0x40339a44,
+    0x40341a5b,
+    0x40349a79,
+    0x40351a89,
+    0x40359a9b,
+    0x40361aa8,
+    0x40369ab4,
+    0x40371ac9,
+    0x40379adb,
+    0x40381ae6,
+    0x40389af8,
+    0x40390f72,
+    0x40399b08,
+    0x403a1b1b,
+    0x403a9b3c,
+    0x403b1b4d,
+    0x403b9b5d,
     0x403c0071,
     0x403c8090,
-    0x403d1b9b,
-    0x403d9bb1,
-    0x403e1bc0,
-    0x403e9bf8,
-    0x403f1c12,
-    0x403f9c3a,
-    0x40401c4f,
-    0x40409c63,
-    0x40411c9e,
-    0x40419cb9,
-    0x40421cd2,
-    0x40429ce5,
-    0x40431cf9,
-    0x40439d27,
-    0x40441d3e,
+    0x403d1bbe,
+    0x403d9bd4,
+    0x403e1be3,
+    0x403e9c1b,
+    0x403f1c35,
+    0x403f9c5d,
+    0x40401c72,
+    0x40409c86,
+    0x40411cc1,
+    0x40419cdc,
+    0x40421cf5,
+    0x40429d08,
+    0x40431d1c,
+    0x40439d4a,
+    0x40441d61,
     0x404480b9,
-    0x40451d53,
-    0x40459d65,
-    0x40461d89,
-    0x40469da9,
-    0x40471db7,
-    0x40479dde,
-    0x40481e4f,
-    0x40489f09,
-    0x40491f20,
-    0x40499f3a,
-    0x404a1f51,
-    0x404a9f6f,
-    0x404b1f87,
-    0x404b9fb4,
-    0x404c1fca,
-    0x404c9fdc,
-    0x404d1ffd,
-    0x404da036,
-    0x404e204a,
-    0x404ea057,
-    0x404f20f1,
-    0x404fa167,
-    0x405021d6,
-    0x4050a1ea,
-    0x4051221d,
-    0x4052222d,
-    0x4052a251,
-    0x40532269,
-    0x4053a27c,
-    0x40542291,
-    0x4054a2b4,
-    0x405522df,
-    0x4055a31c,
-    0x40562341,
-    0x4056a35a,
-    0x40572372,
-    0x4057a385,
-    0x4058239a,
-    0x4058a3c1,
-    0x405923f0,
-    0x4059a41d,
-    0x405a2431,
-    0x405aa441,
-    0x405b2459,
-    0x405ba46a,
-    0x405c247d,
-    0x405ca4bc,
-    0x405d24c9,
-    0x405da4ee,
-    0x405e252c,
-    0x405e8adb,
-    0x405f254d,
-    0x405fa55a,
-    0x40602568,
-    0x4060a58a,
-    0x406125eb,
-    0x4061a623,
-    0x4062263a,
-    0x4062a64b,
-    0x40632698,
-    0x4063a6ad,
-    0x406426c4,
-    0x4064a6f0,
-    0x4065270b,
-    0x4065a722,
-    0x4066273a,
-    0x4066a764,
-    0x4067278f,
-    0x4067a7d4,
-    0x4068281c,
-    0x4068a83d,
-    0x4069286f,
-    0x4069a89d,
-    0x406a28be,
-    0x406aa8de,
-    0x406b2a66,
-    0x406baa89,
-    0x406c2a9f,
-    0x406cada9,
-    0x406d2dd8,
-    0x406dae00,
-    0x406e2e2e,
-    0x406eae7b,
-    0x406f2ed4,
-    0x406faf0c,
-    0x40702f1f,
-    0x4070af3c,
-    0x4071082a,
-    0x4071af4e,
-    0x40722f61,
-    0x4072af97,
-    0x40732faf,
-    0x40739540,
-    0x40742fc3,
-    0x4074afdd,
-    0x40752fee,
-    0x4075b002,
-    0x40763010,
-    0x40769304,
-    0x40773035,
-    0x4077b075,
-    0x40783090,
-    0x4078b0c9,
-    0x407930e0,
-    0x4079b0f6,
-    0x407a3122,
-    0x407ab135,
-    0x407b314a,
-    0x407bb15c,
-    0x407c318d,
-    0x407cb196,
-    0x407d2858,
-    0x407da18f,
-    0x407e30a5,
-    0x407ea3d1,
-    0x407f1dcb,
-    0x407f9f9e,
-    0x40802101,
-    0x40809df3,
-    0x4081223f,
-    0x4081a0a5,
-    0x40822e19,
-    0x40829b46,
-    0x408323ac,
-    0x4083a6d5,
-    0x40841e07,
-    0x4084a409,
-    0x4085248e,
-    0x4085a5b2,
-    0x4086250e,
-    0x4086a1a9,
-    0x40872e5f,
-    0x4087a600,
-    0x40881b84,
-    0x4088a7e7,
-    0x40891bd3,
-    0x40899b60,
-    0x408a2ad7,
-    0x408a9958,
-    0x408b3171,
-    0x408baee9,
-    0x408c249e,
-    0x408c9990,
-    0x408d1eef,
-    0x408d9e39,
-    0x408e201f,
-    0x408ea2fc,
-    0x408f27fb,
-    0x408fa5ce,
-    0x409027b0,
-    0x4090a4e0,
-    0x40912abf,
-    0x409199b6,
-    0x40921c20,
-    0x4092ae9a,
-    0x40932f7a,
-    0x4093a1ba,
-    0x40941e1b,
-    0x4094aaf0,
-    0x4095265c,
-    0x4095b102,
-    0x40962e46,
-    0x4096a11a,
-    0x40972205,
-    0x4097a06e,
-    0x40981c80,
-    0x4098a670,
-    0x40992eb6,
-    0x4099a329,
-    0x409a22c2,
-    0x409a9974,
-    0x409b1e75,
-    0x409b9ea0,
-    0x409c3057,
-    0x409c9ec8,
-    0x409d20d6,
-    0x409da0bb,
-    0x409e1d11,
-    0x409ea14f,
-    0x409f2137,
-    0x409f9e68,
-    0x40a02177,
-    0x40a0a088,
-    0x41f42991,
-    0x41f92a23,
-    0x41fe2916,
-    0x41feabcc,
-    0x41ff2cfa,
-    0x420329aa,
-    0x420829cc,
-    0x4208aa08,
-    0x420928fa,
-    0x4209aa42,
-    0x420a2951,
-    0x420aa931,
-    0x420b2971,
-    0x420ba9ea,
-    0x420c2d16,
-    0x420cab00,
-    0x420d2bb3,
-    0x420dabea,
-    0x42122c1d,
-    0x42172cdd,
-    0x4217ac5f,
-    0x421c2c81,
-    0x421f2c3c,
-    0x42212d8e,
-    0x42262cc0,
-    0x422b2d6c,
-    0x422bab8e,
-    0x422c2d4e,
-    0x422cab41,
-    0x422d2b1a,
-    0x422dad2d,
-    0x422e2b6d,
-    0x42302c9c,
-    0x4230ac04,
-    0x44320755,
-    0x44328764,
-    0x44330770,
-    0x4433877e,
-    0x44340791,
-    0x443487a2,
-    0x443507a9,
-    0x443587b3,
-    0x443607c6,
-    0x443687dc,
-    0x443707ee,
-    0x443787fb,
-    0x4438080a,
-    0x44388812,
-    0x4439082a,
-    0x44398838,
-    0x443a084b,
-    0x4832132e,
-    0x48329340,
-    0x48331356,
-    0x4833936f,
-    0x4c321394,
-    0x4c3293a4,
-    0x4c3313b7,
-    0x4c3393d7,
+    0x40451d76,
+    0x40459d88,
+    0x40461dac,
+    0x40469dcc,
+    0x40471dda,
+    0x40479e01,
+    0x40481e72,
+    0x40489f2c,
+    0x40491f43,
+    0x40499f5d,
+    0x404a1f74,
+    0x404a9f92,
+    0x404b1faa,
+    0x404b9fd7,
+    0x404c1fed,
+    0x404c9fff,
+    0x404d2020,
+    0x404da059,
+    0x404e206d,
+    0x404ea07a,
+    0x404f2114,
+    0x404fa18a,
+    0x405021f9,
+    0x4050a20d,
+    0x40512240,
+    0x40522250,
+    0x4052a274,
+    0x4053228c,
+    0x4053a29f,
+    0x405422b4,
+    0x4054a2d7,
+    0x40552302,
+    0x4055a33f,
+    0x40562364,
+    0x4056a37d,
+    0x40572395,
+    0x4057a3a8,
+    0x405823bd,
+    0x4058a3e4,
+    0x40592413,
+    0x4059a440,
+    0x405a2454,
+    0x405aa464,
+    0x405b247c,
+    0x405ba48d,
+    0x405c24a0,
+    0x405ca4df,
+    0x405d24ec,
+    0x405da511,
+    0x405e254f,
+    0x405e8afe,
+    0x405f2570,
+    0x405fa57d,
+    0x4060258b,
+    0x4060a5ad,
+    0x4061260e,
+    0x4061a646,
+    0x4062265d,
+    0x4062a66e,
+    0x406326bb,
+    0x4063a6d0,
+    0x406426e7,
+    0x4064a713,
+    0x4065272e,
+    0x4065a745,
+    0x4066275d,
+    0x4066a787,
+    0x406727b2,
+    0x4067a7f7,
+    0x4068283f,
+    0x4068a860,
+    0x40692892,
+    0x4069a8c0,
+    0x406a28e1,
+    0x406aa901,
+    0x406b2a89,
+    0x406baaac,
+    0x406c2ac2,
+    0x406cadcc,
+    0x406d2dfb,
+    0x406dae23,
+    0x406e2e51,
+    0x406eae9e,
+    0x406f2ef7,
+    0x406faf2f,
+    0x40702f42,
+    0x4070af5f,
+    0x4071084d,
+    0x4071af71,
+    0x40722f84,
+    0x4072afba,
+    0x40732fd2,
+    0x40739563,
+    0x40742fe6,
+    0x4074b000,
+    0x40753011,
+    0x4075b025,
+    0x40763033,
+    0x40769327,
+    0x40773058,
+    0x4077b098,
+    0x407830b3,
+    0x4078b0ec,
+    0x40793103,
+    0x4079b119,
+    0x407a3145,
+    0x407ab158,
+    0x407b316d,
+    0x407bb17f,
+    0x407c31b0,
+    0x407cb1b9,
+    0x407d287b,
+    0x407da1b2,
+    0x407e30c8,
+    0x407ea3f4,
+    0x407f1dee,
+    0x407f9fc1,
+    0x40802124,
+    0x40809e16,
+    0x40812262,
+    0x4081a0c8,
+    0x40822e3c,
+    0x40829b69,
+    0x408323cf,
+    0x4083a6f8,
+    0x40841e2a,
+    0x4084a42c,
+    0x408524b1,
+    0x4085a5d5,
+    0x40862531,
+    0x4086a1cc,
+    0x40872e82,
+    0x4087a623,
+    0x40881ba7,
+    0x4088a80a,
+    0x40891bf6,
+    0x40899b83,
+    0x408a2afa,
+    0x408a997b,
+    0x408b3194,
+    0x408baf0c,
+    0x408c24c1,
+    0x408c99b3,
+    0x408d1f12,
+    0x408d9e5c,
+    0x408e2042,
+    0x408ea31f,
+    0x408f281e,
+    0x408fa5f1,
+    0x409027d3,
+    0x4090a503,
+    0x40912ae2,
+    0x409199d9,
+    0x40921c43,
+    0x4092aebd,
+    0x40932f9d,
+    0x4093a1dd,
+    0x40941e3e,
+    0x4094ab13,
+    0x4095267f,
+    0x4095b125,
+    0x40962e69,
+    0x4096a13d,
+    0x40972228,
+    0x4097a091,
+    0x40981ca3,
+    0x4098a693,
+    0x40992ed9,
+    0x4099a34c,
+    0x409a22e5,
+    0x409a9997,
+    0x409b1e98,
+    0x409b9ec3,
+    0x409c307a,
+    0x409c9eeb,
+    0x409d20f9,
+    0x409da0de,
+    0x409e1d34,
+    0x409ea172,
+    0x409f215a,
+    0x409f9e8b,
+    0x40a0219a,
+    0x40a0a0ab,
+    0x41f429b4,
+    0x41f92a46,
+    0x41fe2939,
+    0x41feabef,
+    0x41ff2d1d,
+    0x420329cd,
+    0x420829ef,
+    0x4208aa2b,
+    0x4209291d,
+    0x4209aa65,
+    0x420a2974,
+    0x420aa954,
+    0x420b2994,
+    0x420baa0d,
+    0x420c2d39,
+    0x420cab23,
+    0x420d2bd6,
+    0x420dac0d,
+    0x42122c40,
+    0x42172d00,
+    0x4217ac82,
+    0x421c2ca4,
+    0x421f2c5f,
+    0x42212db1,
+    0x42262ce3,
+    0x422b2d8f,
+    0x422babb1,
+    0x422c2d71,
+    0x422cab64,
+    0x422d2b3d,
+    0x422dad50,
+    0x422e2b90,
+    0x42302cbf,
+    0x4230ac27,
+    0x44320778,
+    0x44328787,
+    0x44330793,
+    0x443387a1,
+    0x443407b4,
+    0x443487c5,
+    0x443507cc,
+    0x443587d6,
+    0x443607e9,
+    0x443687ff,
+    0x44370811,
+    0x4437881e,
+    0x4438082d,
+    0x44388835,
+    0x4439084d,
+    0x4439885b,
+    0x443a086e,
+    0x48321351,
+    0x48329363,
+    0x48331379,
+    0x48339392,
+    0x4c3213b7,
+    0x4c3293c7,
+    0x4c3313da,
+    0x4c3393fa,
     0x4c3400b9,
     0x4c3480f7,
-    0x4c3513e3,
-    0x4c3593f1,
-    0x4c36140d,
-    0x4c369433,
-    0x4c371442,
-    0x4c379450,
-    0x4c381465,
-    0x4c389471,
-    0x4c391491,
-    0x4c3994bb,
-    0x4c3a14d4,
-    0x4c3a94ed,
-    0x4c3b0625,
-    0x4c3b9506,
-    0x4c3c1518,
-    0x4c3c9527,
-    0x4c3d1540,
-    0x4c3d8c6f,
-    0x4c3e15ad,
-    0x4c3e954f,
-    0x4c3f15cf,
-    0x4c3f9304,
-    0x4c401565,
-    0x4c409380,
-    0x4c41159d,
-    0x4c419420,
-    0x4c421589,
-    0x5032353e,
-    0x5032b54d,
-    0x50333558,
-    0x5033b568,
-    0x50343581,
-    0x5034b59b,
-    0x503535a9,
-    0x5035b5bf,
-    0x503635d1,
-    0x5036b5e7,
-    0x50373600,
-    0x5037b613,
-    0x5038362b,
-    0x5038b63c,
-    0x50393651,
-    0x5039b665,
-    0x503a3685,
-    0x503ab69b,
-    0x503b36b3,
-    0x503bb6c5,
-    0x503c36e1,
-    0x503cb6f8,
-    0x503d3711,
-    0x503db727,
-    0x503e3734,
-    0x503eb74a,
-    0x503f375c,
-    0x503f83a3,
-    0x5040376f,
-    0x5040b77f,
-    0x50413799,
-    0x5041b7a8,
-    0x504237c2,
-    0x5042b7df,
-    0x504337ef,
-    0x5043b7ff,
-    0x5044381c,
-    0x50448459,
-    0x50453830,
-    0x5045b84e,
-    0x50463861,
-    0x5046b877,
-    0x50473889,
-    0x5047b89e,
-    0x504838c4,
-    0x5048b8d2,
-    0x504938e5,
-    0x5049b8fa,
-    0x504a3910,
-    0x504ab920,
-    0x504b3940,
-    0x504bb953,
-    0x504c3976,
-    0x504cb9a4,
-    0x504d39d1,
-    0x504db9ee,
-    0x504e3a09,
-    0x504eba25,
-    0x504f3a37,
-    0x504fba4e,
-    0x50503a5d,
-    0x50508719,
-    0x50513a70,
-    0x5051b80e,
-    0x505239b6,
-    0x58320f8d,
-    0x68320f4f,
-    0x68328ca7,
-    0x68330cba,
-    0x68338f5d,
-    0x68340f6d,
+    0x4c351406,
+    0x4c359414,
+    0x4c361430,
+    0x4c369456,
+    0x4c371465,
+    0x4c379473,
+    0x4c381488,
+    0x4c389494,
+    0x4c3914b4,
+    0x4c3994de,
+    0x4c3a14f7,
+    0x4c3a9510,
+    0x4c3b0635,
+    0x4c3b9529,
+    0x4c3c153b,
+    0x4c3c954a,
+    0x4c3d1563,
+    0x4c3d8c92,
+    0x4c3e15d0,
+    0x4c3e9572,
+    0x4c3f15f2,
+    0x4c3f9327,
+    0x4c401588,
+    0x4c4093a3,
+    0x4c4115c0,
+    0x4c419443,
+    0x4c4215ac,
+    0x50323561,
+    0x5032b570,
+    0x5033357b,
+    0x5033b58b,
+    0x503435a4,
+    0x5034b5be,
+    0x503535cc,
+    0x5035b5e2,
+    0x503635f4,
+    0x5036b60a,
+    0x50373623,
+    0x5037b636,
+    0x5038364e,
+    0x5038b65f,
+    0x50393674,
+    0x5039b688,
+    0x503a36a8,
+    0x503ab6be,
+    0x503b36d6,
+    0x503bb6e8,
+    0x503c3704,
+    0x503cb71b,
+    0x503d3734,
+    0x503db74a,
+    0x503e3757,
+    0x503eb76d,
+    0x503f377f,
+    0x503f83b3,
+    0x50403792,
+    0x5040b7a2,
+    0x504137bc,
+    0x5041b7cb,
+    0x504237e5,
+    0x5042b802,
+    0x50433812,
+    0x5043b822,
+    0x5044383f,
+    0x50448469,
+    0x50453853,
+    0x5045b871,
+    0x50463884,
+    0x5046b89a,
+    0x504738ac,
+    0x5047b8c1,
+    0x504838e7,
+    0x5048b8f5,
+    0x50493908,
+    0x5049b91d,
+    0x504a3933,
+    0x504ab943,
+    0x504b3963,
+    0x504bb976,
+    0x504c3999,
+    0x504cb9c7,
+    0x504d39f4,
+    0x504dba11,
+    0x504e3a2c,
+    0x504eba48,
+    0x504f3a5a,
+    0x504fba71,
+    0x50503a80,
+    0x50508729,
+    0x50513a93,
+    0x5051b831,
+    0x505239d9,
+    0x58320fb0,
+    0x68320f72,
+    0x68328cca,
+    0x68330cdd,
+    0x68338f80,
+    0x68340f90,
     0x683480f7,
-    0x6c320f15,
-    0x6c328c5e,
-    0x6c330f20,
-    0x6c338f39,
-    0x74320a43,
+    0x6c320f38,
+    0x6c328c81,
+    0x6c330f43,
+    0x6c338f5c,
+    0x74320a66,
     0x743280b9,
-    0x74330c6f,
-    0x783209a8,
-    0x783289bd,
-    0x783309c9,
+    0x74330c92,
+    0x783209cb,
+    0x783289e0,
+    0x783309ec,
     0x78338090,
-    0x783409d8,
-    0x783489ed,
-    0x78350a0c,
-    0x78358a2e,
-    0x78360a43,
-    0x78368a59,
-    0x78370a69,
-    0x78378a8a,
-    0x78380a9d,
-    0x78388aaf,
-    0x78390abc,
-    0x78398adb,
-    0x783a0af0,
-    0x783a8afe,
-    0x783b0b08,
-    0x783b8b1c,
-    0x783c0b33,
-    0x783c8b48,
-    0x783d0b5f,
-    0x783d8b74,
-    0x783e0aca,
-    0x783e8a7c,
-    0x7c32121d,
-    0x80321433,
+    0x783409fb,
+    0x78348a10,
+    0x78350a2f,
+    0x78358a51,
+    0x78360a66,
+    0x78368a7c,
+    0x78370a8c,
+    0x78378aad,
+    0x78380ac0,
+    0x78388ad2,
+    0x78390adf,
+    0x78398afe,
+    0x783a0b13,
+    0x783a8b21,
+    0x783b0b2b,
+    0x783b8b3f,
+    0x783c0b56,
+    0x783c8b6b,
+    0x783d0b82,
+    0x783d8b97,
+    0x783e0aed,
+    0x783e8a9f,
+    0x7c321240,
+    0x80321456,
     0x80328090,
-    0x80333253,
+    0x80333276,
     0x803380b9,
-    0x80343262,
-    0x8034b1ca,
-    0x803531e8,
-    0x8035b276,
-    0x8036322a,
-    0x8036b1d9,
-    0x8037321c,
-    0x8037b1b7,
-    0x8038323d,
-    0x8038b1f9,
-    0x8039320e,
+    0x80343285,
+    0x8034b1ed,
+    0x8035320b,
+    0x8035b299,
+    0x8036324d,
+    0x8036b1fc,
+    0x8037323f,
+    0x8037b1da,
+    0x80383260,
+    0x8038b21c,
+    0x80393231,
 };
 
 const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]);
@@ -854,6 +856,7 @@
     "INVALID_BIT_STRING_PADDING\0"
     "INVALID_BMPSTRING\0"
     "INVALID_DIGIT\0"
+    "INVALID_INTEGER\0"
     "INVALID_MODIFIER\0"
     "INVALID_NUMBER\0"
     "INVALID_OBJECT_ENCODING\0"
@@ -900,6 +903,7 @@
     "UNSUPPORTED_ANY_DEFINED_BY_TYPE\0"
     "UNSUPPORTED_PUBLIC_KEY_TYPE\0"
     "UNSUPPORTED_TYPE\0"
+    "WRONG_INTEGER_TYPE\0"
     "WRONG_PUBLIC_KEY_TYPE\0"
     "WRONG_TAG\0"
     "WRONG_TYPE\0"
diff --git a/eureka.mk b/eureka.mk
index 93d4437..9b500f8 100644
--- a/eureka.mk
+++ b/eureka.mk
@@ -20,7 +20,6 @@
   src/crypto/asn1/a_bool.c\
   src/crypto/asn1/a_d2i_fp.c\
   src/crypto/asn1/a_dup.c\
-  src/crypto/asn1/a_enum.c\
   src/crypto/asn1/a_gentm.c\
   src/crypto/asn1/a_i2d_fp.c\
   src/crypto/asn1/a_int.c\
diff --git a/sources.bp b/sources.bp
index 2f3e684..7e1293f 100644
--- a/sources.bp
+++ b/sources.bp
@@ -22,7 +22,6 @@
         "src/crypto/asn1/a_bool.c",
         "src/crypto/asn1/a_d2i_fp.c",
         "src/crypto/asn1/a_dup.c",
-        "src/crypto/asn1/a_enum.c",
         "src/crypto/asn1/a_gentm.c",
         "src/crypto/asn1/a_i2d_fp.c",
         "src/crypto/asn1/a_int.c",
diff --git a/sources.mk b/sources.mk
index 397432e..1f1d08f 100644
--- a/sources.mk
+++ b/sources.mk
@@ -20,7 +20,6 @@
   src/crypto/asn1/a_bool.c\
   src/crypto/asn1/a_d2i_fp.c\
   src/crypto/asn1/a_dup.c\
-  src/crypto/asn1/a_enum.c\
   src/crypto/asn1/a_gentm.c\
   src/crypto/asn1/a_i2d_fp.c\
   src/crypto/asn1/a_int.c\
diff --git a/src/.gitignore b/src/.gitignore
index 6cbc9d2..1a27c89 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -7,6 +7,7 @@
 *.swo
 doc/*.html
 doc/doc.css
+rust/Cargo.lock
 rust/target
 
 util/bot/android_ndk
diff --git a/src/BUILDING.md b/src/BUILDING.md
index 10645be..e9a2b0c 100644
--- a/src/BUILDING.md
+++ b/src/BUILDING.md
@@ -30,12 +30,10 @@
     by CMake, it may be configured explicitly by setting
     `CMAKE_ASM_NASM_COMPILER`.
 
-  * C and C++ compilers with C++11 support are required. On Windows, MSVC 14
-    (Visual Studio 2015) or later with Platform SDK 8.1 or later are supported,
-    but newer versions are recommended. We will drop support for Visual Studio
-    2015 in March 2022, five years after the release of Visual Studio 2017.
-    Recent versions of GCC (6.1+) and Clang should work on non-Windows
-    platforms, and maybe on Windows too.
+  * C and C++ compilers with C++11 support are required. On Windows, MSVC from
+    Visual Studio 2017 or later with Platform SDK 8.1 or later are supported,
+    but newer versions are recommended. Recent versions of GCC (6.1+) and Clang
+    should work on non-Windows platforms, and maybe on Windows too.
 
   * The most recent stable version of [Go](https://golang.org/dl/) is required.
     Note Go is exempt from the five year support window. If not found by CMake,
diff --git a/src/crypto/CMakeLists.txt b/src/crypto/CMakeLists.txt
index 6ab74b8..79802c6 100644
--- a/src/crypto/CMakeLists.txt
+++ b/src/crypto/CMakeLists.txt
@@ -203,7 +203,6 @@
   asn1/a_bool.c
   asn1/a_d2i_fp.c
   asn1/a_dup.c
-  asn1/a_enum.c
   asn1/a_gentm.c
   asn1/a_i2d_fp.c
   asn1/a_int.c
diff --git a/src/crypto/abi_self_test.cc b/src/crypto/abi_self_test.cc
index c48818b..9681498 100644
--- a/src/crypto/abi_self_test.cc
+++ b/src/crypto/abi_self_test.cc
@@ -58,9 +58,10 @@
 #if defined(OPENSSL_WINDOWS)
     // The invalid epilog makes Windows believe the epilog starts later than it
     // actually does. As a result, immediately after the popq, it does not
-    // realize the stack has been unwound and repeats the work.
-    EXPECT_NONFATAL_FAILURE(CHECK_ABI_SEH(abi_test_bad_unwind_epilog),
-                            "unwound past starting frame");
+    // realize the stack has been unwound and repeats the popq. This will result
+    // in reading the wrong return address and fail to unwind. The exact failure
+    // may vary depending on what was on the stack before.
+    EXPECT_NONFATAL_FAILURE(CHECK_ABI_SEH(abi_test_bad_unwind_epilog), "");
     CHECK_ABI_NO_UNWIND(abi_test_bad_unwind_epilog);
 #endif  // OPENSSL_WINDOWS
   }
diff --git a/src/crypto/asn1/a_enum.c b/src/crypto/asn1/a_enum.c
deleted file mode 100644
index d7a7357..0000000
--- a/src/crypto/asn1/a_enum.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to.  The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    "This product includes cryptographic software written by
- *     Eric Young (eay@cryptsoft.com)"
- *    The word 'cryptographic' can be left out if the rouines from the library
- *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- *    the apps directory (application code) you must include an acknowledgement:
- *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.] */
-
-#include <openssl/asn1.h>
-
-#include <limits.h>
-#include <string.h>
-
-#include <openssl/err.h>
-#include <openssl/mem.h>
-
-#include "../internal.h"
-
-
-/*
- * Code for ENUMERATED type: identical to INTEGER apart from a different tag.
- * for comments on encoding see a_int.c
- */
-
-int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v)
-{
-    int j, k;
-    unsigned int i;
-    unsigned char buf[sizeof(long) + 1];
-    long d;
-
-    a->type = V_ASN1_ENUMERATED;
-    if (a->length < (int)(sizeof(long) + 1)) {
-        if (a->data != NULL)
-            OPENSSL_free(a->data);
-        if ((a->data =
-             (unsigned char *)OPENSSL_malloc(sizeof(long) + 1)) != NULL)
-            OPENSSL_memset((char *)a->data, 0, sizeof(long) + 1);
-    }
-    if (a->data == NULL) {
-        OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
-        return (0);
-    }
-    d = v;
-    if (d < 0) {
-        d = -d;
-        a->type = V_ASN1_NEG_ENUMERATED;
-    }
-
-    for (i = 0; i < sizeof(long); i++) {
-        if (d == 0)
-            break;
-        buf[i] = (int)d & 0xff;
-        d >>= 8;
-    }
-    j = 0;
-    for (k = i - 1; k >= 0; k--)
-        a->data[j++] = buf[k];
-    a->length = j;
-    return (1);
-}
-
-long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a)
-{
-    int neg = 0, i;
-
-    if (a == NULL)
-        return (0L);
-    i = a->type;
-    if (i == V_ASN1_NEG_ENUMERATED)
-        neg = 1;
-    else if (i != V_ASN1_ENUMERATED)
-        return -1;
-
-    OPENSSL_STATIC_ASSERT(sizeof(uint64_t) >= sizeof(long),
-                          "long larger than uint64_t");
-
-    if (a->length > (int)sizeof(uint64_t)) {
-        /* hmm... a bit ugly */
-        return -1;
-    }
-
-    uint64_t r64 = 0;
-    if (a->data != NULL) {
-      for (i = 0; i < a->length; i++) {
-          r64 <<= 8;
-          r64 |= (unsigned char)a->data[i];
-      }
-
-      if (r64 > LONG_MAX) {
-          return -1;
-      }
-    }
-
-    long r = (long) r64;
-    if (neg)
-        r = -r;
-
-    return r;
-}
-
-ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai)
-{
-    ASN1_ENUMERATED *ret;
-    int len, j;
-
-    if (ai == NULL)
-        ret = ASN1_ENUMERATED_new();
-    else
-        ret = ai;
-    if (ret == NULL) {
-        OPENSSL_PUT_ERROR(ASN1, ASN1_R_NESTED_ASN1_ERROR);
-        goto err;
-    }
-    if (BN_is_negative(bn))
-        ret->type = V_ASN1_NEG_ENUMERATED;
-    else
-        ret->type = V_ASN1_ENUMERATED;
-    j = BN_num_bits(bn);
-    len = ((j == 0) ? 0 : ((j / 8) + 1));
-    if (ret->length < len + 4) {
-        unsigned char *new_data = OPENSSL_realloc(ret->data, len + 4);
-        if (!new_data) {
-            OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
-            goto err;
-        }
-        ret->data = new_data;
-    }
-
-    ret->length = BN_bn2bin(bn, ret->data);
-    return (ret);
- err:
-    if (ret != ai)
-        ASN1_ENUMERATED_free(ret);
-    return (NULL);
-}
-
-BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai, BIGNUM *bn)
-{
-    BIGNUM *ret;
-
-    if ((ret = BN_bin2bn(ai->data, ai->length, bn)) == NULL)
-        OPENSSL_PUT_ERROR(ASN1, ASN1_R_BN_LIB);
-    else if (ai->type == V_ASN1_NEG_ENUMERATED)
-        BN_set_negative(ret, 1);
-    return (ret);
-}
diff --git a/src/crypto/asn1/a_int.c b/src/crypto/asn1/a_int.c
index 1695fd0..512472a 100644
--- a/src/crypto/asn1/a_int.c
+++ b/src/crypto/asn1/a_int.c
@@ -59,8 +59,10 @@
 #include <string.h>
 #include <limits.h>
 
+#include <openssl/bytestring.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/type_check.h>
 
 #include "../internal.h"
 
@@ -72,129 +74,110 @@
 
 int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y)
 {
-    int neg, ret;
-    /* Compare signs */
-    neg = x->type & V_ASN1_NEG;
+    /* Compare signs. */
+    int neg = x->type & V_ASN1_NEG;
     if (neg != (y->type & V_ASN1_NEG)) {
-        if (neg)
-            return -1;
-        else
+        return neg ? -1 : 1;
+    }
+
+    int ret = ASN1_STRING_cmp(x, y);
+    if (neg) {
+        /* This could be |-ret|, but |ASN1_STRING_cmp| is not forbidden from
+         * returning |INT_MIN|. */
+        if (ret < 0) {
             return 1;
+        } else if (ret > 0) {
+            return -1;
+        } else {
+            return 0;
+        }
     }
 
-    ret = ASN1_STRING_cmp(x, y);
-
-    if (neg)
-        return -ret;
-    else
-        return ret;
+    return ret;
 }
 
-/*
- * This converts an ASN1 INTEGER into its content encoding.
- * The internal representation is an ASN1_STRING whose data is a big endian
- * representation of the value, ignoring the sign. The sign is determined by
- * the type: V_ASN1_INTEGER for positive and V_ASN1_NEG_INTEGER for negative.
- *
- * Positive integers are no problem: they are almost the same as the DER
- * encoding, except if the first byte is >= 0x80 we need to add a zero pad.
- *
- * Negative integers are a bit trickier...
- * The DER representation of negative integers is in 2s complement form.
- * The internal form is converted by complementing each octet and finally
- * adding one to the result. This can be done less messily with a little trick.
- * If the internal form has trailing zeroes then they will become FF by the
- * complement and 0 by the add one (due to carry) so just copy as many trailing
- * zeros to the destination as there are in the source. The carry will add one
- * to the last none zero octet: so complement this octet and add one and finally
- * complement any left over until you get to the start of the string.
- *
- * Padding is a little trickier too. If the first bytes is > 0x80 then we pad
- * with 0xff. However if the first byte is 0x80 and one of the following bytes
- * is non-zero we pad with 0xff. The reason for this distinction is that 0x80
- * followed by optional zeros isn't padded.
- */
-
-int i2c_ASN1_INTEGER(const ASN1_INTEGER *a, unsigned char **pp)
+/* negate_twos_complement negates |len| bytes from |buf| in-place, interpreted
+ * as a signed, big-endian two's complement value. */
+static void negate_twos_complement(uint8_t *buf, size_t len)
 {
-    int pad = 0, ret, i, neg;
-    unsigned char *p, *n, pb = 0;
-
-    if (a == NULL)
-        return (0);
-    neg = a->type & V_ASN1_NEG;
-    if (a->length == 0)
-        ret = 1;
-    else {
-        ret = a->length;
-        i = a->data[0];
-        if (ret == 1 && i == 0)
-            neg = 0;
-        if (!neg && (i > 127)) {
-            pad = 1;
-            pb = 0;
-        } else if (neg) {
-            if (i > 128) {
-                pad = 1;
-                pb = 0xFF;
-            } else if (i == 128) {
-                /*
-                 * Special case: if any other bytes non zero we pad:
-                 * otherwise we don't.
-                 */
-                for (i = 1; i < a->length; i++)
-                    if (a->data[i]) {
-                        pad = 1;
-                        pb = 0xFF;
-                        break;
-                    }
-            }
-        }
-        ret += pad;
+    uint8_t borrow = 0;
+    for (size_t i = len - 1; i < len; i--) {
+        uint8_t t = buf[i];
+        buf[i] = 0u - borrow - t;
+        borrow |= t != 0;
     }
-    if (pp == NULL)
-        return (ret);
-    p = *pp;
-
-    if (pad)
-        *(p++) = pb;
-    if (a->length == 0)
-        *(p++) = 0;
-    else if (!neg)
-        OPENSSL_memcpy(p, a->data, (unsigned int)a->length);
-    else {
-        /* Begin at the end of the encoding */
-        n = a->data + a->length - 1;
-        p += a->length - 1;
-        i = a->length;
-        /* Copy zeros to destination as long as source is zero */
-        while (!*n && i > 1) {
-            *(p--) = 0;
-            n--;
-            i--;
-        }
-        /* Complement and increment next octet */
-        *(p--) = ((*(n--)) ^ 0xff) + 1;
-        i--;
-        /* Complement any octets left */
-        for (; i > 0; i--)
-            *(p--) = *(n--) ^ 0xff;
-    }
-
-    *pp += ret;
-    return (ret);
 }
 
-/* Convert just ASN1 INTEGER content octets to ASN1_INTEGER structure */
+static int is_all_zeros(const uint8_t *in, size_t len) {
+    for (size_t i = 0; i < len; i++) {
+        if (in[i] != 0) {
+            return 0;
+        }
+    }
+    return 1;
+}
 
-ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a, const unsigned char **pp,
+int i2c_ASN1_INTEGER(const ASN1_INTEGER *in, unsigned char **outp)
+{
+    if (in == NULL) {
+        return 0;
+    }
+
+    /* |ASN1_INTEGER|s should be represented minimally, but it is possible to
+     * construct invalid ones. Skip leading zeros so this does not produce an
+     * invalid encoding or break invariants. */
+    int start = 0;
+    while (start < in->length && in->data[start] == 0) {
+        start++;
+    }
+
+    int is_negative = (in->type & V_ASN1_NEG) != 0;
+    int pad;
+    if (start >= in->length) {
+        /* Zero is represented as a single byte. */
+        is_negative = 0;
+        pad = 1;
+    } else if (is_negative) {
+        /* 0x80...01 through 0xff...ff have a two's complement of 0x7f...ff
+         * through 0x00...01 and need an extra byte to be negative.
+         * 0x01...00 through 0x80...00 have a two's complement of 0xfe...ff
+         * through 0x80...00 and can be negated as-is. */
+        pad = in->data[start] > 0x80 ||
+              (in->data[start] == 0x80 &&
+               !is_all_zeros(in->data + start + 1, in->length - start - 1));
+    } else {
+        /* If the high bit is set, the signed representation needs an extra
+         * byte to be positive. */
+        pad = (in->data[start] & 0x80) != 0;
+    }
+
+    if (in->length - start > INT_MAX - pad) {
+        OPENSSL_PUT_ERROR(ASN1, ERR_R_OVERFLOW);
+        return 0;
+    }
+    int len = pad + in->length - start;
+    assert(len > 0);
+    if (outp == NULL) {
+        return len;
+    }
+
+    if (pad) {
+        (*outp)[0] = 0;
+    }
+    OPENSSL_memcpy(*outp + pad, in->data + start, in->length - start);
+    if (is_negative) {
+        negate_twos_complement(*outp, len);
+        assert((*outp)[0] >= 0x80);
+    } else {
+        assert((*outp)[0] < 0x80);
+    }
+    *outp += len;
+    return len;
+}
+
+ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **out, const unsigned char **inp,
                                long len)
 {
-    ASN1_INTEGER *ret = NULL;
-    const unsigned char *p, *pend;
-    unsigned char *to, *s;
-    int i;
-
     /*
      * This function can handle lengths up to INT_MAX - 1, but the rest of the
      * legacy ASN.1 code mixes integer types, so avoid exposing it to
@@ -205,85 +188,69 @@
         return NULL;
     }
 
-    if ((a == NULL) || ((*a) == NULL)) {
-        if ((ret = ASN1_INTEGER_new()) == NULL)
-            return (NULL);
-        ret->type = V_ASN1_INTEGER;
-    } else
-        ret = (*a);
-
-    p = *pp;
-    pend = p + len;
-
-    /*
-     * We must OPENSSL_malloc stuff, even for 0 bytes otherwise it signifies
-     * a missing NULL parameter.
-     */
-    s = (unsigned char *)OPENSSL_malloc((int)len + 1);
-    if (s == NULL) {
-        i = ERR_R_MALLOC_FAILURE;
-        goto err;
+    CBS cbs;
+    CBS_init(&cbs, *inp, (size_t)len);
+    int is_negative;
+    if (!CBS_is_valid_asn1_integer(&cbs, &is_negative)) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_INTEGER);
+        return NULL;
     }
-    to = s;
-    if (!len) {
-        /*
-         * Strictly speaking this is an illegal INTEGER but we tolerate it.
-         */
-        ret->type = V_ASN1_INTEGER;
-    } else if (*p & 0x80) {     /* a negative number */
-        ret->type = V_ASN1_NEG_INTEGER;
-        if ((*p == 0xff) && (len != 1)) {
-            p++;
-            len--;
-        }
-        i = len;
-        p += i - 1;
-        to += i - 1;
-        while ((!*p) && i) {
-            *(to--) = 0;
-            i--;
-            p--;
-        }
-        /*
-         * Special case: if all zeros then the number will be of the form FF
-         * followed by n zero bytes: this corresponds to 1 followed by n zero
-         * bytes. We've already written n zeros so we just append an extra
-         * one and set the first byte to a 1. This is treated separately
-         * because it is the only case where the number of bytes is larger
-         * than len.
-         */
-        if (!i) {
-            *s = 1;
-            s[len] = 0;
-            len++;
-        } else {
-            *(to--) = (*(p--) ^ 0xff) + 1;
-            i--;
-            for (; i > 0; i--)
-                *(to--) = *(p--) ^ 0xff;
+
+    ASN1_INTEGER *ret = NULL;
+    if (out == NULL || *out == NULL) {
+        ret = ASN1_INTEGER_new();
+        if (ret == NULL) {
+            return NULL;
         }
     } else {
-        ret->type = V_ASN1_INTEGER;
-        if ((*p == 0) && (len != 1)) {
-            p++;
-            len--;
-        }
-        OPENSSL_memcpy(s, p, (int)len);
+        ret = *out;
     }
 
-    if (ret->data != NULL)
-        OPENSSL_free(ret->data);
-    ret->data = s;
-    ret->length = (int)len;
-    if (a != NULL)
-        (*a) = ret;
-    *pp = pend;
-    return (ret);
+    /* Convert to |ASN1_INTEGER|'s sign-and-magnitude representation. First,
+     * determine the size needed for a minimal result. */
+    if (is_negative) {
+        /* 0xff00...01 through 0xff7f..ff have a two's complement of 0x00ff...ff
+         * through 0x000100...001 and need one leading zero removed. 0x8000...00
+         * through 0xff00...00 have a two's complement of 0x8000...00 through
+         * 0x0100...00 and will be minimally-encoded as-is. */
+        if (CBS_len(&cbs) > 0 && CBS_data(&cbs)[0] == 0xff &&
+            !is_all_zeros(CBS_data(&cbs) + 1, CBS_len(&cbs) - 1)) {
+            CBS_skip(&cbs, 1);
+        }
+    } else {
+        /* Remove the leading zero byte, if any. */
+        if (CBS_len(&cbs) > 0 && CBS_data(&cbs)[0] == 0x00) {
+            CBS_skip(&cbs, 1);
+        }
+    }
+
+    if (!ASN1_STRING_set(ret, CBS_data(&cbs), CBS_len(&cbs))) {
+        goto err;
+    }
+
+    if (is_negative) {
+        ret->type = V_ASN1_NEG_INTEGER;
+        negate_twos_complement(ret->data, ret->length);
+    } else {
+        ret->type = V_ASN1_INTEGER;
+    }
+
+    /* The value should be minimally-encoded. */
+    assert(ret->length == 0 || ret->data[0] != 0);
+    /* Zero is not negative. */
+    assert(!is_negative || ret->length > 0);
+
+    *inp += len;
+    if (out != NULL) {
+        *out = ret;
+    }
+    return ret;
+
  err:
-    OPENSSL_PUT_ERROR(ASN1, i);
-    if ((ret != NULL) && ((a == NULL) || (*a != ret)))
+    if (ret != NULL && (out == NULL || *out != ret)) {
         ASN1_INTEGER_free(ret);
-    return (NULL);
+    }
+    return NULL;
 }
 
 int ASN1_INTEGER_set(ASN1_INTEGER *a, long v)
@@ -300,121 +267,196 @@
     return 1;
 }
 
-int ASN1_INTEGER_set_uint64(ASN1_INTEGER *out, uint64_t v)
+int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v)
 {
-    uint8_t *const newdata = OPENSSL_malloc(sizeof(uint64_t));
-    if (newdata == NULL) {
-        OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
+    if (v >= 0) {
+        return ASN1_ENUMERATED_set_uint64(a, (uint64_t) v);
+    }
+
+    if (!ASN1_ENUMERATED_set_uint64(a, 0 - (uint64_t) v)) {
         return 0;
     }
 
-    OPENSSL_free(out->data);
-    out->data = newdata;
-    v = CRYPTO_bswap8(v);
-    memcpy(out->data, &v, sizeof(v));
+    a->type = V_ASN1_NEG_ENUMERATED;
+    return 1;
+}
 
-    out->type = V_ASN1_INTEGER;
-
+static int asn1_string_set_uint64(ASN1_STRING *out, uint64_t v, int type)
+{
+    uint8_t buf[sizeof(uint64_t)];
+    CRYPTO_store_u64_be(buf, v);
     size_t leading_zeros;
-    for (leading_zeros = 0; leading_zeros < sizeof(uint64_t) - 1;
-         leading_zeros++) {
-        if (out->data[leading_zeros] != 0) {
-            break;
-        }
+    for (leading_zeros = 0; leading_zeros < sizeof(buf); leading_zeros++) {
+      if (buf[leading_zeros] != 0) {
+        break;
+      }
     }
 
-    out->length = sizeof(uint64_t) - leading_zeros;
-    OPENSSL_memmove(out->data, out->data + leading_zeros, out->length);
-
+    if (!ASN1_STRING_set(out, buf + leading_zeros,
+                         sizeof(buf) - leading_zeros)) {
+        return 0;
+    }
+    out->type = type;
     return 1;
 }
 
+int ASN1_INTEGER_set_uint64(ASN1_INTEGER *out, uint64_t v)
+{
+    return asn1_string_set_uint64(out, v, V_ASN1_INTEGER);
+}
+
+int ASN1_ENUMERATED_set_uint64(ASN1_ENUMERATED *out, uint64_t v)
+{
+    return asn1_string_set_uint64(out, v, V_ASN1_ENUMERATED);
+}
+
+static int asn1_string_get_abs_uint64(uint64_t *out, const ASN1_STRING *a,
+                                      int type)
+{
+    if ((a->type & ~V_ASN1_NEG) != type) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_WRONG_INTEGER_TYPE);
+        return 0;
+    }
+    uint8_t buf[sizeof(uint64_t)] = {0};
+    if (a->length > (int)sizeof(buf)) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_INTEGER);
+        return 0;
+    }
+    OPENSSL_memcpy(buf + sizeof(buf) - a->length, a->data, a->length);
+    *out = CRYPTO_load_u64_be(buf);
+    return 1;
+}
+
+static int asn1_string_get_uint64(uint64_t *out, const ASN1_STRING *a, int type)
+{
+    if (!asn1_string_get_abs_uint64(out, a, type)) {
+        return 0;
+    }
+    if (a->type & V_ASN1_NEG) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_INTEGER);
+        return 0;
+    }
+    return 1;
+}
+
+int ASN1_INTEGER_get_uint64(uint64_t *out, const ASN1_INTEGER *a)
+{
+    return asn1_string_get_uint64(out, a, V_ASN1_INTEGER);
+}
+
+int ASN1_ENUMERATED_get_uint64(uint64_t *out, const ASN1_ENUMERATED *a)
+{
+    return asn1_string_get_uint64(out, a, V_ASN1_ENUMERATED);
+}
+
+static long asn1_string_get_long(const ASN1_STRING *a, int type)
+{
+    if (a == NULL) {
+        return 0;
+    }
+
+    uint64_t v;
+    if (!asn1_string_get_abs_uint64(&v, a, type)) {
+        goto err;
+    }
+
+    int64_t i64;
+    int fits_in_i64;
+    /* Check |v != 0| to handle manually-constructed negative zeros. */
+    if ((a->type & V_ASN1_NEG) && v != 0) {
+        i64 = (int64_t)(0u - v);
+        fits_in_i64 = i64 < 0;
+    } else {
+        i64 = (int64_t)v;
+        fits_in_i64 = i64 >= 0;
+    }
+    OPENSSL_STATIC_ASSERT(sizeof(long) <= sizeof(int64_t), "long is too big");
+
+    if (fits_in_i64 && LONG_MIN <= i64 && i64 <= LONG_MAX) {
+        return (long)i64;
+    }
+
+err:
+    /* This function's return value does not distinguish overflow from -1. */
+    ERR_clear_error();
+    return -1;
+}
+
 long ASN1_INTEGER_get(const ASN1_INTEGER *a)
 {
-    int neg = 0, i;
-
-    if (a == NULL)
-        return (0L);
-    i = a->type;
-    if (i == V_ASN1_NEG_INTEGER)
-        neg = 1;
-    else if (i != V_ASN1_INTEGER)
-        return -1;
-
-    OPENSSL_STATIC_ASSERT(sizeof(uint64_t) >= sizeof(long),
-                          "long larger than uint64_t");
-
-    if (a->length > (int)sizeof(uint64_t)) {
-        /* hmm... a bit ugly, return all ones */
-        return -1;
-    }
-
-    uint64_t r64 = 0;
-    if (a->data != NULL) {
-      for (i = 0; i < a->length; i++) {
-          r64 <<= 8;
-          r64 |= (unsigned char)a->data[i];
-      }
-
-      if (r64 > LONG_MAX) {
-          return -1;
-      }
-    }
-
-    long r = (long) r64;
-    if (neg)
-        r = -r;
-
-    return r;
+    return asn1_string_get_long(a, V_ASN1_INTEGER);
 }
 
-ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai)
+long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a)
+{
+    return asn1_string_get_long(a, V_ASN1_ENUMERATED);
+}
+
+static ASN1_STRING *bn_to_asn1_string(const BIGNUM *bn, ASN1_STRING *ai,
+                                      int type)
 {
     ASN1_INTEGER *ret;
-    int len, j;
-
-    if (ai == NULL)
-        ret = ASN1_INTEGER_new();
-    else
+    if (ai == NULL) {
+        ret = ASN1_STRING_type_new(type);
+    } else {
         ret = ai;
+    }
     if (ret == NULL) {
         OPENSSL_PUT_ERROR(ASN1, ASN1_R_NESTED_ASN1_ERROR);
         goto err;
     }
-    if (BN_is_negative(bn) && !BN_is_zero(bn))
-        ret->type = V_ASN1_NEG_INTEGER;
-    else
-        ret->type = V_ASN1_INTEGER;
-    j = BN_num_bits(bn);
-    len = ((j == 0) ? 0 : ((j / 8) + 1));
-    if (ret->length < len + 4) {
-        unsigned char *new_data = OPENSSL_realloc(ret->data, len + 4);
-        if (!new_data) {
-            OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
-            goto err;
-        }
-        ret->data = new_data;
+
+    if (BN_is_negative(bn) && !BN_is_zero(bn)) {
+        ret->type = type | V_ASN1_NEG;
+    } else {
+        ret->type = type;
     }
-    ret->length = BN_bn2bin(bn, ret->data);
-    /* Correct zero case */
-    if (!ret->length) {
-        ret->data[0] = 0;
-        ret->length = 1;
+
+    int len = BN_num_bytes(bn);
+    if (!ASN1_STRING_set(ret, NULL, len) ||
+        !BN_bn2bin_padded(ret->data, len, bn)) {
+        goto err;
     }
-    return (ret);
+    return ret;
+
  err:
-    if (ret != ai)
-        ASN1_INTEGER_free(ret);
-    return (NULL);
+    if (ret != ai) {
+        ASN1_STRING_free(ret);
+    }
+    return NULL;
+}
+
+ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai)
+{
+    return bn_to_asn1_string(bn, ai, V_ASN1_INTEGER);
+}
+
+ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai)
+{
+    return bn_to_asn1_string(bn, ai, V_ASN1_ENUMERATED);
+}
+
+static BIGNUM *asn1_string_to_bn(const ASN1_STRING *ai, BIGNUM *bn, int type)
+{
+    if ((ai->type & ~V_ASN1_NEG) != type) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_WRONG_INTEGER_TYPE);
+        return NULL;
+    }
+
+    BIGNUM *ret;
+    if ((ret = BN_bin2bn(ai->data, ai->length, bn)) == NULL)
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_BN_LIB);
+    else if (ai->type & V_ASN1_NEG)
+        BN_set_negative(ret, 1);
+    return (ret);
 }
 
 BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai, BIGNUM *bn)
 {
-    BIGNUM *ret;
+    return asn1_string_to_bn(ai, bn, V_ASN1_INTEGER);
+}
 
-    if ((ret = BN_bin2bn(ai->data, ai->length, bn)) == NULL)
-        OPENSSL_PUT_ERROR(ASN1, ASN1_R_BN_LIB);
-    else if (ai->type == V_ASN1_NEG_INTEGER)
-        BN_set_negative(ret, 1);
-    return (ret);
+BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai, BIGNUM *bn)
+{
+    return asn1_string_to_bn(ai, bn, V_ASN1_ENUMERATED);
 }
diff --git a/src/crypto/asn1/a_strex.c b/src/crypto/asn1/a_strex.c
index 7829d67..3732894 100644
--- a/src/crypto/asn1/a_strex.c
+++ b/src/crypto/asn1/a_strex.c
@@ -574,7 +574,8 @@
 // their value, updates |v| and |len|, and returns one. Otherwise, returns
 // zero.
 static int consume_two_digits(int* out, const char **v, int *len) {
-  if (*len < 2|| !isdigit((*v)[0]) || !isdigit((*v)[1])) {
+  if (*len < 2 || !isdigit((unsigned char)((*v)[0])) ||
+      !isdigit((unsigned char)((*v)[1]))) {
     return 0;
   }
   *out = ((*v)[0] - '0') * 10 + ((*v)[1] - '0');
diff --git a/src/crypto/asn1/asn1_lib.c b/src/crypto/asn1/asn1_lib.c
index fbf4d68..edf5e7c 100644
--- a/src/crypto/asn1/asn1_lib.c
+++ b/src/crypto/asn1/asn1_lib.c
@@ -59,7 +59,7 @@
 #include <limits.h>
 #include <string.h>
 
-#include <openssl/asn1_mac.h>
+#include <openssl/bytestring.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
 
@@ -104,101 +104,54 @@
 OPENSSL_DECLARE_ERROR_REASON(ASN1, UNKNOWN_TAG)
 OPENSSL_DECLARE_ERROR_REASON(ASN1, UNSUPPORTED_TYPE)
 
-static int asn1_get_length(const unsigned char **pp, long *rl, long max);
 static void asn1_put_length(unsigned char **pp, int length);
 
-int ASN1_get_object(const unsigned char **pp, long *plength, int *ptag,
-                    int *pclass, long omax)
+int ASN1_get_object(const unsigned char **inp, long *out_len, int *out_tag,
+                    int *out_class, long in_len)
 {
-    int i, ret;
-    long l;
-    const unsigned char *p = *pp;
-    int tag, xclass;
-    long max = omax;
-
-    if (!max)
-        goto err;
-    ret = (*p & V_ASN1_CONSTRUCTED);
-    xclass = (*p & V_ASN1_PRIVATE);
-    i = *p & V_ASN1_PRIMITIVE_TAG;
-    if (i == V_ASN1_PRIMITIVE_TAG) { /* high-tag */
-        p++;
-        if (--max == 0)
-            goto err;
-        l = 0;
-        while (*p & 0x80) {
-            l <<= 7L;
-            l |= *(p++) & 0x7f;
-            if (--max == 0)
-                goto err;
-            if (l > (INT_MAX >> 7L))
-                goto err;
-        }
-        l <<= 7L;
-        l |= *(p++) & 0x7f;
-        tag = (int)l;
-        if (--max == 0)
-            goto err;
-    } else {
-        tag = i;
-        p++;
-        if (--max == 0)
-            goto err;
-    }
-
-    /* To avoid ambiguity with V_ASN1_NEG, impose a limit on universal tags. */
-    if (xclass == V_ASN1_UNIVERSAL && tag > V_ASN1_MAX_UNIVERSAL)
-        goto err;
-
-    *ptag = tag;
-    *pclass = xclass;
-    if (!asn1_get_length(&p, plength, max))
-        goto err;
-
-    if (*plength > (omax - (p - *pp))) {
-        OPENSSL_PUT_ERROR(ASN1, ASN1_R_TOO_LONG);
+    if (in_len < 0) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
         return 0x80;
     }
-    *pp = p;
-    return ret;
- err:
-    OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
-    return 0x80;
-}
 
-static int asn1_get_length(const unsigned char **pp, long *rl, long max)
-{
-    const unsigned char *p = *pp;
-    unsigned long ret = 0;
-    unsigned long i;
+    /* TODO(https://crbug.com/boringssl/354): This should use |CBS_get_asn1| to
+     * reject non-minimal lengths, which are only allowed in BER. However,
+     * Android sometimes needs allow a non-minimal length in certificate
+     * signature fields (see b/18228011). Make this only apply to that field,
+     * while requiring DER elsewhere. Better yet, it should be limited to an
+     * preprocessing step in that part of Android. */
+    unsigned tag;
+    size_t header_len;
+    int indefinite;
+    CBS cbs, body;
+    CBS_init(&cbs, *inp, (size_t)in_len);
+    if (!CBS_get_any_ber_asn1_element(&cbs, &body, &tag, &header_len,
+        /*out_ber_found=*/NULL, &indefinite) ||
+        indefinite ||
+        !CBS_skip(&body, header_len) ||
+        /* Bound the length to comfortably fit in an int. Lengths in this
+         * module often switch between int and long without overflow checks. */
+        CBS_len(&body) > INT_MAX / 2) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
+        return 0x80;
+    }
 
-    if (max-- < 1) {
-        return 0;
+    /* Convert between tag representations. */
+    int tag_class = (tag & CBS_ASN1_CLASS_MASK) >> CBS_ASN1_TAG_SHIFT;
+    int constructed = (tag & CBS_ASN1_CONSTRUCTED) >> CBS_ASN1_TAG_SHIFT;
+    int tag_number = tag & CBS_ASN1_TAG_NUMBER_MASK;
+
+    /* To avoid ambiguity with V_ASN1_NEG, impose a limit on universal tags. */
+    if (tag_class == V_ASN1_UNIVERSAL && tag_number > V_ASN1_MAX_UNIVERSAL) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
+        return 0x80;
     }
-    if (*p == 0x80) {
-        /* We do not support BER indefinite-length encoding. */
-        return 0;
-    }
-    i = *p & 0x7f;
-    if (*(p++) & 0x80) {
-        if (i > sizeof(ret) || max < (long)i)
-            return 0;
-        while (i-- > 0) {
-            ret <<= 8L;
-            ret |= *(p++);
-        }
-    } else {
-        ret = i;
-    }
-    /*
-     * Bound the length to comfortably fit in an int. Lengths in this module
-     * often switch between int and long without overflow checks.
-     */
-    if (ret > INT_MAX / 2)
-        return 0;
-    *pp = p;
-    *rl = (long)ret;
-    return 1;
+
+    *inp = CBS_data(&body);
+    *out_len = CBS_len(&body);
+    *out_tag = tag_number;
+    *out_class = tag_class;
+    return constructed;
 }
 
 /*
diff --git a/src/crypto/asn1/asn1_test.cc b/src/crypto/asn1/asn1_test.cc
index ab9cb01..7d69889 100644
--- a/src/crypto/asn1/asn1_test.cc
+++ b/src/crypto/asn1/asn1_test.cc
@@ -15,6 +15,7 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include <map>
 #include <string>
 #include <vector>
 
@@ -76,33 +77,6 @@
                                     obj->value.asn1_string->length));
 }
 
-TEST(ASN1Test, IntegerSetting) {
-  bssl::UniquePtr<ASN1_INTEGER> by_bn(ASN1_INTEGER_new());
-  bssl::UniquePtr<ASN1_INTEGER> by_long(ASN1_INTEGER_new());
-  bssl::UniquePtr<ASN1_INTEGER> by_uint64(ASN1_INTEGER_new());
-  bssl::UniquePtr<BIGNUM> bn(BN_new());
-
-  const std::vector<int64_t> kValues = {
-      LONG_MIN, -2, -1, 0, 1, 2, 0xff, 0x100, 0xffff, 0x10000, LONG_MAX,
-  };
-  for (const auto &i : kValues) {
-    SCOPED_TRACE(i);
-
-    ASSERT_EQ(1, ASN1_INTEGER_set(by_long.get(), i));
-    const uint64_t abs = i < 0 ? (0 - (uint64_t) i) : i;
-    ASSERT_TRUE(BN_set_u64(bn.get(), abs));
-    BN_set_negative(bn.get(), i < 0);
-    ASSERT_TRUE(BN_to_ASN1_INTEGER(bn.get(), by_bn.get()));
-
-    EXPECT_EQ(0, ASN1_INTEGER_cmp(by_bn.get(), by_long.get()));
-
-    if (i >= 0) {
-      ASSERT_EQ(1, ASN1_INTEGER_set_uint64(by_uint64.get(), i));
-      EXPECT_EQ(0, ASN1_INTEGER_cmp(by_bn.get(), by_uint64.get()));
-    }
-  }
-}
-
 // |obj| and |i2d_func| require different template parameters because C++ may
 // deduce, say, |ASN1_STRING*| via |obj| and |const ASN1_STRING*| via
 // |i2d_func|. Template argument deduction then fails. The language is not able
@@ -131,6 +105,391 @@
   EXPECT_EQ(Bytes(expected), Bytes(buf));
 }
 
+static bssl::UniquePtr<BIGNUM> BIGNUMPow2(unsigned bit) {
+  bssl::UniquePtr<BIGNUM> bn(BN_new());
+  if (!bn ||
+      !BN_set_bit(bn.get(), bit)) {
+    return nullptr;
+  }
+  return bn;
+}
+
+TEST(ASN1Test, Integer) {
+  bssl::UniquePtr<BIGNUM> int64_min = BIGNUMPow2(63);
+  ASSERT_TRUE(int64_min);
+  BN_set_negative(int64_min.get(), 1);
+
+  bssl::UniquePtr<BIGNUM> int64_max = BIGNUMPow2(63);
+  ASSERT_TRUE(int64_max);
+  ASSERT_TRUE(BN_sub_word(int64_max.get(), 1));
+
+  bssl::UniquePtr<BIGNUM> int32_min = BIGNUMPow2(31);
+  ASSERT_TRUE(int32_min);
+  BN_set_negative(int32_min.get(), 1);
+
+  bssl::UniquePtr<BIGNUM> int32_max = BIGNUMPow2(31);
+  ASSERT_TRUE(int32_max);
+  ASSERT_TRUE(BN_sub_word(int32_max.get(), 1));
+
+  struct {
+    // der is the DER encoding of the INTEGER, including the tag and length.
+    std::vector<uint8_t> der;
+    // type and data are the corresponding fields of the |ASN1_STRING|
+    // representation.
+    int type;
+    std::vector<uint8_t> data;
+    // bn_asc is the |BIGNUM| representation, as parsed by the |BN_asc2bn|
+    // function.
+    const char *bn_asc;
+  } kTests[] = {
+      // -2^64 - 1
+      {{0x02, 0x09, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_NEG_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       "-0x10000000000000001"},
+      // -2^64
+      {{0x02, 0x09, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_NEG_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       "-0x10000000000000000"},
+      // -2^64 + 1
+      {{0x02, 0x09, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       V_ASN1_NEG_INTEGER,
+       {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       "-0xffffffffffffffff"},
+      // -2^63 - 1
+      {{0x02, 0x09, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_NEG_INTEGER,
+       {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       "-0x8000000000000001"},
+      // -2^63 (INT64_MIN)
+      {{0x02, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_NEG_INTEGER,
+       {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       "-0x8000000000000000"},
+      // -2^63 + 1
+      {{0x02, 0x08, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       V_ASN1_NEG_INTEGER,
+       {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       "-0x7fffffffffffffff"},
+      // -2^32 - 1
+      {{0x02, 0x05, 0xfe, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_NEG_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x01},
+       "-0x100000001"},
+      // -2^32
+      {{0x02, 0x05, 0xff, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_NEG_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00},
+       "-0x100000000"},
+      // -2^32 + 1
+      {{0x02, 0x05, 0xff, 0x00, 0x00, 0x00, 0x01},
+       V_ASN1_NEG_INTEGER,
+       {0xff, 0xff, 0xff, 0xff},
+       "-0xffffffff"},
+      // -2^31 - 1
+      {{0x02, 0x05, 0xff, 0x7f, 0xff, 0xff, 0xff},
+       V_ASN1_NEG_INTEGER,
+       {0x80, 0x00, 0x00, 0x01},
+       "-0x80000001"},
+      // -2^31 (INT32_MIN)
+      {{0x02, 0x04, 0x80, 0x00, 0x00, 0x00},
+       V_ASN1_NEG_INTEGER,
+       {0x80, 0x00, 0x00, 0x00},
+       "-0x80000000"},
+      // -2^31 + 1
+      {{0x02, 0x04, 0x80, 0x00, 0x00, 0x01},
+       V_ASN1_NEG_INTEGER,
+       {0x7f, 0xff, 0xff, 0xff},
+       "-0x7fffffff"},
+      // -257
+      {{0x02, 0x02, 0xfe, 0xff}, V_ASN1_NEG_INTEGER, {0x01, 0x01}, "-257"},
+      // -256
+      {{0x02, 0x02, 0xff, 0x00}, V_ASN1_NEG_INTEGER, {0x01, 0x00}, "-256"},
+      // -255
+      {{0x02, 0x02, 0xff, 0x01}, V_ASN1_NEG_INTEGER, {0xff}, "-255"},
+      // -129
+      {{0x02, 0x02, 0xff, 0x7f}, V_ASN1_NEG_INTEGER, {0x81}, "-129"},
+      // -128
+      {{0x02, 0x01, 0x80}, V_ASN1_NEG_INTEGER, {0x80}, "-128"},
+      // -127
+      {{0x02, 0x01, 0x81}, V_ASN1_NEG_INTEGER, {0x7f}, "-127"},
+      // -1
+      {{0x02, 0x01, 0xff}, V_ASN1_NEG_INTEGER, {0x01}, "-1"},
+      // 0
+      {{0x02, 0x01, 0x00}, V_ASN1_INTEGER, {}, "0"},
+      // 1
+      {{0x02, 0x01, 0x01}, V_ASN1_INTEGER, {0x01}, "1"},
+      // 127
+      {{0x02, 0x01, 0x7f}, V_ASN1_INTEGER, {0x7f}, "127"},
+      // 128
+      {{0x02, 0x02, 0x00, 0x80}, V_ASN1_INTEGER, {0x80}, "128"},
+      // 129
+      {{0x02, 0x02, 0x00, 0x81}, V_ASN1_INTEGER, {0x81}, "129"},
+      // 255
+      {{0x02, 0x02, 0x00, 0xff}, V_ASN1_INTEGER, {0xff}, "255"},
+      // 256
+      {{0x02, 0x02, 0x01, 0x00}, V_ASN1_INTEGER, {0x01, 0x00}, "256"},
+      // 257
+      {{0x02, 0x02, 0x01, 0x01}, V_ASN1_INTEGER, {0x01, 0x01}, "257"},
+      // 2^31 - 2
+      {{0x02, 0x04, 0x7f, 0xff, 0xff, 0xfe},
+       V_ASN1_INTEGER,
+       {0x7f, 0xff, 0xff, 0xfe},
+       "0x7ffffffe"},
+      // 2^31 - 1 (INT32_MAX)
+      {{0x02, 0x04, 0x7f, 0xff, 0xff, 0xff},
+       V_ASN1_INTEGER,
+       {0x7f, 0xff, 0xff, 0xff},
+       "0x7fffffff"},
+      // 2^31
+      {{0x02, 0x05, 0x00, 0x80, 0x00, 0x00, 0x00},
+       V_ASN1_INTEGER,
+       {0x80, 0x00, 0x00, 0x00},
+       "0x80000000"},
+      // 2^32 - 2
+      {{0x02, 0x05, 0x00, 0xff, 0xff, 0xff, 0xfe},
+       V_ASN1_INTEGER,
+       {0xff, 0xff, 0xff, 0xfe},
+       "0xfffffffe"},
+      // 2^32 - 1 (UINT32_MAX)
+      {{0x02, 0x05, 0x00, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_INTEGER,
+       {0xff, 0xff, 0xff, 0xff},
+       "0xffffffff"},
+      // 2^32
+      {{0x02, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00},
+       "0x100000000"},
+      // 2^63 - 2
+      {{0x02, 0x08, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe},
+       V_ASN1_INTEGER,
+       {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe},
+       "0x7ffffffffffffffe"},
+      // 2^63 - 1 (INT64_MAX)
+      {{0x02, 0x08, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_INTEGER,
+       {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       "0x7fffffffffffffff"},
+      // 2^63
+      {{0x02, 0x09, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_INTEGER,
+       {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       "0x8000000000000000"},
+      // 2^64 - 2
+      {{0x02, 0x09, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe},
+       V_ASN1_INTEGER,
+       {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe},
+       "0xfffffffffffffffe"},
+      // 2^64 - 1 (UINT64_MAX)
+      {{0x02, 0x09, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       V_ASN1_INTEGER,
+       {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+       "0xffffffffffffffff"},
+      // 2^64
+      {{0x02, 0x09, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       V_ASN1_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+       "0x10000000000000000"},
+      // 2^64 + 1
+      {{0x02, 0x09, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       V_ASN1_INTEGER,
+       {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+       "0x10000000000000001"},
+  };
+
+  for (const auto &t : kTests) {
+    SCOPED_TRACE(t.bn_asc);
+    // Collect a map of different ways to construct the integer. The key is the
+    // method used and is only retained to aid debugging.
+    std::map<std::string, bssl::UniquePtr<ASN1_INTEGER>> objs;
+
+    // Construct |ASN1_INTEGER| by setting the type and data manually.
+    bssl::UniquePtr<ASN1_INTEGER> by_data(ASN1_STRING_type_new(t.type));
+    ASSERT_TRUE(by_data);
+    ASSERT_TRUE(ASN1_STRING_set(by_data.get(), t.data.data(), t.data.size()));
+    objs["data"] = std::move(by_data);
+
+    // Construct |ASN1_INTEGER| from a |BIGNUM|.
+    BIGNUM *bn_raw = nullptr;
+    ASSERT_TRUE(BN_asc2bn(&bn_raw, t.bn_asc));
+    bssl::UniquePtr<BIGNUM> bn(bn_raw);
+    bssl::UniquePtr<ASN1_INTEGER> by_bn(BN_to_ASN1_INTEGER(bn.get(), nullptr));
+    ASSERT_TRUE(by_bn);
+    objs["bn"] = std::move(by_bn);
+
+    // Construct |ASN1_INTEGER| from decoding.
+    const uint8_t *ptr = t.der.data();
+    bssl::UniquePtr<ASN1_INTEGER> by_der(
+        d2i_ASN1_INTEGER(nullptr, &ptr, t.der.size()));
+    ASSERT_TRUE(by_der);
+    EXPECT_EQ(ptr, t.der.data() + t.der.size());
+    objs["der"] = std::move(by_der);
+
+    // Construct |ASN1_INTEGER| from |long| or |uint64_t|, if it fits.
+    bool fits_in_long = false, fits_in_u64 = false;
+    uint64_t u64 = 0;
+    long l = 0;
+    uint64_t abs_u64;
+    if (BN_get_u64(bn.get(), &abs_u64)) {
+      fits_in_u64 = !BN_is_negative(bn.get());
+      if (fits_in_u64) {
+        u64 = abs_u64;
+        bssl::UniquePtr<ASN1_INTEGER> by_u64(ASN1_INTEGER_new());
+        ASSERT_TRUE(by_u64);
+        ASSERT_TRUE(ASN1_INTEGER_set_uint64(by_u64.get(), u64));
+        objs["u64"] = std::move(by_u64);
+      }
+
+      if (sizeof(long) == 8) {
+        fits_in_long = BN_cmp(int64_min.get(), bn.get()) <= 0 &&
+                       BN_cmp(bn.get(), int64_max.get()) <= 0;
+      } else {
+        ASSERT_EQ(4u, sizeof(long));
+        fits_in_long = BN_cmp(int32_min.get(), bn.get()) <= 0 &&
+                       BN_cmp(bn.get(), int32_max.get()) <= 0;
+      }
+      if (fits_in_long) {
+        if (BN_is_negative(bn.get())) {
+          l = static_cast<long>(0u - abs_u64);
+        } else {
+          l = static_cast<long>(abs_u64);
+        }
+        bssl::UniquePtr<ASN1_INTEGER> by_long(ASN1_INTEGER_new());
+        ASSERT_TRUE(by_long);
+        ASSERT_TRUE(ASN1_INTEGER_set(by_long.get(), l));
+        objs["long"] = std::move(by_long);
+      }
+    }
+
+    // Default construction should return the zero |ASN1_INTEGER|.
+    if (BN_is_zero(bn.get())) {
+      bssl::UniquePtr<ASN1_INTEGER> by_default(ASN1_INTEGER_new());
+      ASSERT_TRUE(by_default);
+      objs["default"] = std::move(by_default);
+    }
+
+    // Test that every |ASN1_INTEGER| constructed behaves as expected.
+    for (const auto &pair : objs) {
+      // The fields should be as expected.
+      SCOPED_TRACE(pair.first);
+      const ASN1_INTEGER *obj = pair.second.get();
+      EXPECT_EQ(t.type, ASN1_STRING_type(obj));
+      EXPECT_EQ(Bytes(t.data), Bytes(ASN1_STRING_get0_data(obj),
+                                     ASN1_STRING_length(obj)));
+
+      // The object should encode correctly.
+      TestSerialize(obj, i2d_ASN1_INTEGER, t.der);
+
+      bssl::UniquePtr<BIGNUM> bn2(ASN1_INTEGER_to_BN(obj, nullptr));
+      ASSERT_TRUE(bn2);
+      EXPECT_EQ(0, BN_cmp(bn.get(), bn2.get()));
+
+      if (fits_in_u64) {
+        uint64_t v;
+        ASSERT_TRUE(ASN1_INTEGER_get_uint64(&v, obj));
+        EXPECT_EQ(v, u64);
+      } else {
+        uint64_t v;
+        EXPECT_FALSE(ASN1_INTEGER_get_uint64(&v, obj));
+      }
+
+      if (fits_in_long) {
+        EXPECT_EQ(l, ASN1_INTEGER_get(obj));
+      } else {
+        EXPECT_EQ(-1, ASN1_INTEGER_get(obj));
+      }
+
+      // All variations of integers should compare as equal to each other, as
+      // strings or integers. (Functions like |ASN1_TYPE_cmp| rely on
+      // string-based comparison.)
+      for (const auto &pair2 : objs) {
+        SCOPED_TRACE(pair2.first);
+        EXPECT_EQ(0, ASN1_INTEGER_cmp(obj, pair2.second.get()));
+        EXPECT_EQ(0, ASN1_STRING_cmp(obj, pair2.second.get()));
+      }
+    }
+
+    // Although our parsers will never output non-minimal |ASN1_INTEGER|s, it is
+    // possible to construct them manually. They should encode correctly.
+    std::vector<uint8_t> data = t.data;
+    const int kMaxExtraBytes = 5;
+    for (int i = 0; i < kMaxExtraBytes; i++) {
+      data.insert(data.begin(), 0x00);
+      SCOPED_TRACE(Bytes(data));
+
+      bssl::UniquePtr<ASN1_INTEGER> non_minimal(ASN1_STRING_type_new(t.type));
+      ASSERT_TRUE(non_minimal);
+      ASSERT_TRUE(ASN1_STRING_set(non_minimal.get(), data.data(), data.size()));
+
+      TestSerialize(non_minimal.get(), i2d_ASN1_INTEGER, t.der);
+    }
+  }
+
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kTests); i++) {
+    SCOPED_TRACE(Bytes(kTests[i].der));
+    const uint8_t *ptr = kTests[i].der.data();
+    bssl::UniquePtr<ASN1_INTEGER> a(
+        d2i_ASN1_INTEGER(nullptr, &ptr, kTests[i].der.size()));
+    ASSERT_TRUE(a);
+    for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(kTests); j++) {
+      SCOPED_TRACE(Bytes(kTests[j].der));
+      ptr = kTests[j].der.data();
+      bssl::UniquePtr<ASN1_INTEGER> b(
+          d2i_ASN1_INTEGER(nullptr, &ptr, kTests[j].der.size()));
+      ASSERT_TRUE(b);
+
+      // |ASN1_INTEGER_cmp| should compare numerically. |ASN1_STRING_cmp| does
+      // not but should preserve equality.
+      if (i < j) {
+        EXPECT_LT(ASN1_INTEGER_cmp(a.get(), b.get()), 0);
+        EXPECT_NE(ASN1_STRING_cmp(a.get(), b.get()), 0);
+      } else if (i > j) {
+        EXPECT_GT(ASN1_INTEGER_cmp(a.get(), b.get()), 0);
+        EXPECT_NE(ASN1_STRING_cmp(a.get(), b.get()), 0);
+      } else {
+        EXPECT_EQ(ASN1_INTEGER_cmp(a.get(), b.get()), 0);
+        EXPECT_EQ(ASN1_STRING_cmp(a.get(), b.get()), 0);
+      }
+    }
+  }
+
+  std::vector<uint8_t> kInvalidTests[] = {
+      // The empty string is not an integer.
+      {0x02, 0x00},
+      // Integers must be minimally-encoded.
+      {0x02, 0x02, 0x00, 0x00},
+      {0x02, 0x02, 0x00, 0x7f},
+      {0x02, 0x02, 0xff, 0xff},
+      {0x02, 0x02, 0xff, 0x80},
+  };
+  for (const auto &invalid : kInvalidTests) {
+    SCOPED_TRACE(Bytes(invalid));
+
+    const uint8_t *ptr = invalid.data();
+    bssl::UniquePtr<ASN1_INTEGER> integer(
+        d2i_ASN1_INTEGER(nullptr, &ptr, invalid.size()));
+    EXPECT_FALSE(integer);
+  }
+
+  // Callers expect |ASN1_INTEGER_get| and |ASN1_ENUMERATED_get| to return zero
+  // given NULL.
+  EXPECT_EQ(0, ASN1_INTEGER_get(nullptr));
+  EXPECT_EQ(0, ASN1_ENUMERATED_get(nullptr));
+}
+
+// Although invalid, a negative zero should encode correctly.
+TEST(ASN1Test, NegativeZero) {
+  bssl::UniquePtr<ASN1_INTEGER> neg_zero(
+      ASN1_STRING_type_new(V_ASN1_NEG_INTEGER));
+  ASSERT_TRUE(neg_zero);
+  EXPECT_EQ(0, ASN1_INTEGER_get(neg_zero.get()));
+
+  static const uint8_t kDER[] = {0x02, 0x01, 0x00};
+  TestSerialize(neg_zero.get(), i2d_ASN1_INTEGER, kDER);
+}
+
 TEST(ASN1Test, SerializeObject) {
   static const uint8_t kDER[] = {0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
                                  0xf7, 0x0d, 0x01, 0x01, 0x01};
@@ -1700,7 +2059,7 @@
             std::string(reinterpret_cast<const char *>(bio_data), bio_len));
 }
 
-TEST(ASN1, GetObject) {
+TEST(ASN1Test, GetObject) {
   // The header is valid, but there are not enough bytes for the length.
   static const uint8_t kTruncated[] = {0x30, 0x01};
   const uint8_t *ptr = kTruncated;
diff --git a/src/crypto/asn1/tasn_utl.c b/src/crypto/asn1/tasn_utl.c
index 9b1da0b..0b6048c 100644
--- a/src/crypto/asn1/tasn_utl.c
+++ b/src/crypto/asn1/tasn_utl.c
@@ -223,7 +223,6 @@
                                  int nullerr) {
   const ASN1_ADB *adb;
   const ASN1_ADB_TABLE *atbl;
-  long selector;
   ASN1_VALUE **sfld;
   int i;
   if (!(tt->flags & ASN1_TFLG_ADB_MASK)) {
@@ -244,14 +243,11 @@
     return adb->null_tt;
   }
 
-  /* Convert type to a long:
+  /* Convert type to a NID:
    * NB: don't check for NID_undef here because it
    * might be a legitimate value in the table */
-  if (tt->flags & ASN1_TFLG_ADB_OID) {
-    selector = OBJ_obj2nid((ASN1_OBJECT *)*sfld);
-  } else {
-    selector = ASN1_INTEGER_get((ASN1_INTEGER *)*sfld);
-  }
+  assert(tt->flags & ASN1_TFLG_ADB_OID);
+  int selector = OBJ_obj2nid((ASN1_OBJECT *)*sfld);
 
   /* Try to find matching entry in table Maybe should check application types
    * first to allow application override? Might also be useful to have a flag
diff --git a/src/crypto/bio/printf.c b/src/crypto/bio/printf.c
index 4f9d8a1..253546b 100644
--- a/src/crypto/bio/printf.c
+++ b/src/crypto/bio/printf.c
@@ -71,18 +71,6 @@
   va_start(args, format);
   out_len = vsnprintf(buf, sizeof(buf), format, args);
   va_end(args);
-
-#if defined(OPENSSL_WINDOWS)
-  // On Windows, vsnprintf returns -1 rather than the requested length on
-  // truncation
-  if (out_len < 0) {
-    va_start(args, format);
-    out_len = _vscprintf(format, args);
-    va_end(args);
-    assert(out_len >= (int)sizeof(buf));
-  }
-#endif
-
   if (out_len < 0) {
     return -1;
   }
diff --git a/src/crypto/bytestring/ber.c b/src/crypto/bytestring/ber.c
index e7f67dd..d9b780f 100644
--- a/src/crypto/bytestring/ber.c
+++ b/src/crypto/bytestring/ber.c
@@ -69,9 +69,9 @@
     CBS contents;
     unsigned tag;
     size_t header_len;
-
+    int indefinite;
     if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len,
-                                      ber_found)) {
+                                      ber_found, &indefinite)) {
       return 0;
     }
     if (*ber_found) {
@@ -119,11 +119,11 @@
     CBS contents;
     unsigned tag, child_string_tag = string_tag;
     size_t header_len;
-    int ber_found;
+    int indefinite;
     CBB *out_contents, out_contents_storage;
 
     if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len,
-                                      &ber_found)) {
+                                      /*out_ber_found=*/NULL, &indefinite)) {
       return 0;
     }
 
@@ -153,11 +153,9 @@
       out_contents = &out_contents_storage;
     }
 
-    if (CBS_len(&contents) == header_len && header_len > 0 &&
-        CBS_data(&contents)[header_len - 1] == 0x80) {
-      // This is an indefinite length element.
+    if (indefinite) {
       if (!cbs_convert_ber(in, out_contents, child_string_tag,
-                           1 /* looking for eoc */, depth + 1) ||
+                           /*looking_for_eoc=*/1, depth + 1) ||
           !CBB_flush(out)) {
         return 0;
       }
@@ -171,7 +169,7 @@
     if (tag & CBS_ASN1_CONSTRUCTED) {
       // Recurse into children.
       if (!cbs_convert_ber(&contents, out_contents, child_string_tag,
-                           0 /* not looking for eoc */, depth + 1)) {
+                           /*looking_for_eoc=*/0, depth + 1)) {
         return 0;
       }
     } else {
diff --git a/src/crypto/bytestring/bytestring_test.cc b/src/crypto/bytestring/bytestring_test.cc
index 985e38c..77261a3 100644
--- a/src/crypto/bytestring/bytestring_test.cc
+++ b/src/crypto/bytestring/bytestring_test.cc
@@ -693,29 +693,38 @@
   const char *in_hex;
   bool ok;
   bool ber_found;
+  bool indefinite;
   unsigned tag;
 };
 
 static const BERTest kBERTests[] = {
-  // Trivial cases, also valid DER.
-  {"0000", true, false, 0},
-  {"0100", true, false, 1},
-  {"020101", true, false, 2},
+    // Trivial cases, also valid DER.
+    {"0000", true, false, false, 0},
+    {"0100", true, false, false, 1},
+    {"020101", true, false, false, 2},
 
-  // Non-minimally encoded lengths.
-  {"02810101", true, true, 2},
-  {"0282000101", true, true, 2},
-  {"028300000101", true, true, 2},
-  {"02840000000101", true, true, 2},
-  // Technically valid BER, but not handled.
-  {"02850000000101", false, false, 0},
+    // Non-minimally encoded lengths.
+    {"02810101", true, true, false, 2},
+    {"0282000101", true, true, false, 2},
+    {"028300000101", true, true, false, 2},
+    {"02840000000101", true, true, false, 2},
+    // Technically valid BER, but not handled.
+    {"02850000000101", false, false, false, 0},
 
-  {"0280", false, false, 0},  // Indefinite length, but not constructed.
-  {"2280", true, true, CBS_ASN1_CONSTRUCTED | 2},  // Indefinite length.
-  {"3f0000", false, false, 0},  // Invalid extended tag zero (X.690 8.1.2.4.2.c)
-  {"1f0100", false, false, 0},  // Should be a low-number tag form, even in BER.
-  {"1f4000", true, false, 0x40},
-  {"1f804000", false, false, 0},  // Non-minimal tags are invalid, even in BER.
+    // Indefinite length, but not constructed.
+    {"0280", false, false, false, 0},
+    // Indefinite length.
+    {"2280", true, true, true, CBS_ASN1_CONSTRUCTED | 2},
+    // Indefinite length with multi-byte tag.
+    {"bf1f80", true, true, true,
+     CBS_ASN1_CONSTRUCTED | CBS_ASN1_CONTEXT_SPECIFIC | 31},
+    // Invalid extended tag zero (X.690 8.1.2.4.2.c)
+    {"3f0000", false, false, false, 0},
+    // Should be a low-number tag form, even in BER.
+    {"1f0100", false, false, false, 0},
+    {"1f4000", true, false, false, 0x40},
+    // Non-minimal tags are invalid, even in BER.
+    {"1f804000", false, false, false, 0},
 };
 
 TEST(CBSTest, BERElementTest) {
@@ -729,14 +738,16 @@
     unsigned tag;
     size_t header_len;
     int ber_found;
-    int ok =
-        CBS_get_any_ber_asn1_element(&in, &out, &tag, &header_len, &ber_found);
+    int indefinite;
+    int ok = CBS_get_any_ber_asn1_element(&in, &out, &tag, &header_len,
+                                          &ber_found, &indefinite);
     ASSERT_TRUE((ok == 1) == test.ok);
     if (!test.ok) {
       continue;
     }
 
-    EXPECT_TRUE((ber_found == 1) == test.ber_found);
+    EXPECT_EQ(test.ber_found ? 1 : 0, ber_found);
+    EXPECT_EQ(test.indefinite ? 1 : 0, indefinite);
     EXPECT_LE(header_len, in_bytes.size());
     EXPECT_EQ(CBS_len(&out), in_bytes.size());
     EXPECT_EQ(CBS_len(&in), 0u);
diff --git a/src/crypto/bytestring/cbs.c b/src/crypto/bytestring/cbs.c
index 803c97a..293e66c 100644
--- a/src/crypto/bytestring/cbs.c
+++ b/src/crypto/bytestring/cbs.c
@@ -285,7 +285,7 @@
 
 static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
                                     size_t *out_header_len, int *out_ber_found,
-                                    int ber_ok) {
+                                    int *out_indefinite, int ber_ok) {
   CBS header = *cbs;
   CBS throwaway;
 
@@ -294,6 +294,10 @@
   }
   if (ber_ok) {
     *out_ber_found = 0;
+    *out_indefinite = 0;
+  } else {
+    assert(out_ber_found == NULL);
+    assert(out_indefinite == NULL);
   }
 
   unsigned tag;
@@ -333,6 +337,7 @@
         *out_header_len = header_len;
       }
       *out_ber_found = 1;
+      *out_indefinite = 1;
       return CBS_get_bytes(cbs, out, header_len);
     }
 
@@ -395,16 +400,18 @@
 
 int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
                                     size_t *out_header_len) {
-  return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
-                                  NULL, 0 /* DER only */);
+  return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len, NULL, NULL,
+                                  /*ber_ok=*/0);
 }
 
 int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
-                                 size_t *out_header_len, int *out_ber_found) {
+                                 size_t *out_header_len, int *out_ber_found,
+                                 int *out_indefinite) {
   int ber_found_temp;
   return cbs_get_any_asn1_element(
       cbs, out, out_tag, out_header_len,
-      out_ber_found ? out_ber_found : &ber_found_temp, 1 /* BER allowed */);
+      out_ber_found ? out_ber_found : &ber_found_temp, out_indefinite,
+      /*ber_ok=*/1);
 }
 
 static int cbs_get_asn1(CBS *cbs, CBS *out, unsigned tag_value,
diff --git a/src/crypto/crypto_test.cc b/src/crypto/crypto_test.cc
index 7f15a23..caccba5 100644
--- a/src/crypto/crypto_test.cc
+++ b/src/crypto/crypto_test.cc
@@ -138,3 +138,23 @@
 }
 
 #endif  // BORINGSSL_FIPS_COUNTERS
+
+TEST(Crypto, QueryAlgorithmStatus) {
+#if defined(BORINGSSL_FIPS)
+  const bool is_fips_build = true;
+#else
+  const bool is_fips_build = false;
+#endif
+
+  EXPECT_EQ(FIPS_query_algorithm_status("AES-GCM"), is_fips_build);
+  EXPECT_EQ(FIPS_query_algorithm_status("AES-ECB"), is_fips_build);
+
+  EXPECT_FALSE(FIPS_query_algorithm_status("FakeEncrypt"));
+  EXPECT_FALSE(FIPS_query_algorithm_status(""));
+}
+
+#if defined(BORINGSSL_FIPS) && !defined(OPENSSL_ASAN)
+TEST(Crypto, OnDemandIntegrityTest) {
+  BORINGSSL_integrity_test();
+}
+#endif
diff --git a/src/crypto/curve25519/curve25519.c b/src/crypto/curve25519/curve25519.c
index 7cb0add..e316acd 100644
--- a/src/crypto/curve25519/curve25519.c
+++ b/src/crypto/curve25519/curve25519.c
@@ -36,6 +36,10 @@
 // Various pre-computed constants.
 #include "./curve25519_tables.h"
 
+#if defined(OPENSSL_NO_ASM)
+#define FIAT_25519_NO_ASM
+#endif
+
 #if defined(BORINGSSL_CURVE25519_64BIT)
 #include "../../third_party/fiat/curve25519_64.h"
 #else
diff --git a/src/crypto/err/asn1.errordata b/src/crypto/err/asn1.errordata
index 9344621..8ba7cf6 100644
--- a/src/crypto/err/asn1.errordata
+++ b/src/crypto/err/asn1.errordata
@@ -44,6 +44,7 @@
 ASN1,194,INVALID_BIT_STRING_PADDING
 ASN1,142,INVALID_BMPSTRING
 ASN1,143,INVALID_DIGIT
+ASN1,196,INVALID_INTEGER
 ASN1,144,INVALID_MODIFIER
 ASN1,145,INVALID_NUMBER
 ASN1,146,INVALID_OBJECT_ENCODING
@@ -90,6 +91,7 @@
 ASN1,186,UNSUPPORTED_ANY_DEFINED_BY_TYPE
 ASN1,187,UNSUPPORTED_PUBLIC_KEY_TYPE
 ASN1,188,UNSUPPORTED_TYPE
+ASN1,195,WRONG_INTEGER_TYPE
 ASN1,189,WRONG_PUBLIC_KEY_TYPE
 ASN1,190,WRONG_TAG
 ASN1,191,WRONG_TYPE
diff --git a/src/crypto/fipsmodule/bcm.c b/src/crypto/fipsmodule/bcm.c
index 1219bc7..faff6c4 100644
--- a/src/crypto/fipsmodule/bcm.c
+++ b/src/crypto/fipsmodule/bcm.c
@@ -169,6 +169,23 @@
 #if !defined(OPENSSL_ASAN)
   // Integrity tests cannot run under ASAN because it involves reading the full
   // .text section, which triggers the global-buffer overflow detection.
+  if (!BORINGSSL_integrity_test()) {
+    goto err;
+  }
+#endif  // OPENSSL_ASAN
+
+  if (!boringssl_self_test_startup()) {
+    goto err;
+  }
+
+  return;
+
+err:
+  BORINGSSL_FIPS_abort();
+}
+
+#if !defined(OPENSSL_ASAN)
+int BORINGSSL_integrity_test(void) {
   const uint8_t *const start = BORINGSSL_bcm_text_start;
   const uint8_t *const end = BORINGSSL_bcm_text_end;
 
@@ -198,14 +215,14 @@
   const EVP_MD *const kHashFunction = EVP_sha256();
   if (!boringssl_self_test_sha256() ||
       !boringssl_self_test_hmac_sha256()) {
-    goto err;
+    return 0;
   }
 #else
   uint8_t result[SHA512_DIGEST_LENGTH];
   const EVP_MD *const kHashFunction = EVP_sha512();
   if (!boringssl_self_test_sha512() ||
       !boringssl_self_test_hmac_sha256()) {
-    goto err;
+    return 0;
   }
 #endif
 
@@ -216,7 +233,7 @@
   if (!HMAC_Init_ex(&hmac_ctx, kHMACKey, sizeof(kHMACKey), kHashFunction,
                     NULL /* no ENGINE */)) {
     fprintf(stderr, "HMAC_Init_ex failed.\n");
-    goto err;
+    return 0;
   }
 
   BORINGSSL_maybe_set_module_text_permissions(PROT_READ | PROT_EXEC);
@@ -236,7 +253,7 @@
   if (!HMAC_Final(&hmac_ctx, result, &result_len) ||
       result_len != sizeof(result)) {
     fprintf(stderr, "HMAC failed.\n");
-    goto err;
+    return 0;
   }
   HMAC_CTX_cleanse(&hmac_ctx); // FIPS 140-3, AS05.10.
 
@@ -244,22 +261,14 @@
 
   if (!check_test(expected, result, sizeof(result), "FIPS integrity test")) {
 #if !defined(BORINGSSL_FIPS_BREAK_TESTS)
-    goto err;
+    return 0;
 #endif
   }
 
   OPENSSL_cleanse(result, sizeof(result)); // FIPS 140-3, AS05.10.
-#endif  // OPENSSL_ASAN
-
-  if (!boringssl_self_test_startup()) {
-    goto err;
-  }
-
-  return;
-
-err:
-  BORINGSSL_FIPS_abort();
+  return 1;
 }
+#endif  // OPENSSL_ASAN
 
 void BORINGSSL_FIPS_abort(void) {
   for (;;) {
diff --git a/src/crypto/fipsmodule/bn/bn_test.cc b/src/crypto/fipsmodule/bn/bn_test.cc
index 72ec8c2..7d57802 100644
--- a/src/crypto/fipsmodule/bn/bn_test.cc
+++ b/src/crypto/fipsmodule/bn/bn_test.cc
@@ -2712,6 +2712,22 @@
   EXPECT_FALSE(BN_is_negative(r.get()));
 }
 
+TEST_F(BNTest, ModSqrtInvalid) {
+  bssl::UniquePtr<BIGNUM> bn2140141 = ASCIIToBIGNUM("2140141");
+  ASSERT_TRUE(bn2140141);
+  bssl::UniquePtr<BIGNUM> bn2140142 = ASCIIToBIGNUM("2140142");
+  ASSERT_TRUE(bn2140142);
+  bssl::UniquePtr<BIGNUM> bn4588033 = ASCIIToBIGNUM("4588033");
+  ASSERT_TRUE(bn4588033);
+
+  // |BN_mod_sqrt| may fail or return an arbitrary value, so we do not use
+  // |TestModSqrt| or |TestNotModSquare|. We only promise it will not crash or
+  // infinite loop. (For some invalid inputs, it may even be non-deterministic.)
+  // See CVE-2022-0778.
+  BN_free(BN_mod_sqrt(nullptr, bn2140141.get(), bn4588033.get(), ctx()));
+  BN_free(BN_mod_sqrt(nullptr, bn2140142.get(), bn4588033.get(), ctx()));
+}
+
 #if defined(OPENSSL_BN_ASM_MONT) && defined(SUPPORTS_ABI_TEST)
 TEST_F(BNTest, BNMulMontABI) {
   for (size_t words : {4, 5, 6, 7, 8, 16, 32}) {
diff --git a/src/crypto/fipsmodule/bn/sqrt.c b/src/crypto/fipsmodule/bn/sqrt.c
index db88829..9180d54 100644
--- a/src/crypto/fipsmodule/bn/sqrt.c
+++ b/src/crypto/fipsmodule/bn/sqrt.c
@@ -306,8 +306,7 @@
   }
 
   // x := a^((q-1)/2)
-  if (BN_is_zero(t))  // special case: p = 2^e + 1
-  {
+  if (BN_is_zero(t)) {  // special case: p = 2^e + 1
     if (!BN_nnmod(t, A, p, ctx)) {
       goto end;
     }
@@ -350,7 +349,6 @@
     // We have  a*b = x^2,
     //    y^2^(e-1) = -1,
     //    b^2^(e-1) = 1.
-
     if (BN_is_one(b)) {
       if (!BN_copy(ret, x)) {
         goto end;
@@ -359,23 +357,26 @@
       goto vrfy;
     }
 
-
-    // find smallest  i  such that  b^(2^i) = 1
-    i = 1;
-    if (!BN_mod_sqr(t, b, p, ctx)) {
+    // Find the smallest i, 0 < i < e, such that b^(2^i) = 1
+    for (i = 1; i < e; i++) {
+      if (i == 1) {
+        if (!BN_mod_sqr(t, b, p, ctx)) {
+          goto end;
+        }
+      } else {
+        if (!BN_mod_mul(t, t, t, p, ctx)) {
+          goto end;
+        }
+      }
+      if (BN_is_one(t)) {
+        break;
+      }
+    }
+    // If not found, a is not a square or p is not a prime.
+    if (i >= e) {
+      OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
       goto end;
     }
-    while (!BN_is_one(t)) {
-      i++;
-      if (i == e) {
-        OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
-        goto end;
-      }
-      if (!BN_mod_mul(t, t, t, p, ctx)) {
-        goto end;
-      }
-    }
-
 
     // t := y^2^(e - i - 1)
     if (!BN_copy(t, y)) {
@@ -391,14 +392,15 @@
         !BN_mod_mul(b, b, y, p, ctx)) {
       goto end;
     }
+
+    // e decreases each iteration, so this loop will terminate.
+    assert(i < e);
     e = i;
   }
 
 vrfy:
   if (!err) {
-    // verify the result -- the input might have been not a square
-    // (test added in 0.9.8)
-
+    // Verify the result. The input might have been not a square.
     if (!BN_mod_sqr(x, ret, p, ctx)) {
       err = 1;
     }
diff --git a/src/crypto/fipsmodule/ec/ec_key.c b/src/crypto/fipsmodule/ec/ec_key.c
index d7acf96..2d04d13 100644
--- a/src/crypto/fipsmodule/ec/ec_key.c
+++ b/src/crypto/fipsmodule/ec/ec_key.c
@@ -308,6 +308,9 @@
   }
 
   // Check the public and private keys match.
+  //
+  // NOTE: this is a FIPS pair-wise consistency check for the ECDH case. See SP
+  // 800-56Ar3, page 36.
   if (eckey->priv_key != NULL) {
     EC_RAW_POINT point;
     if (!ec_point_mul_scalar_base(eckey->group, &point,
diff --git a/src/crypto/fipsmodule/ec/p256.c b/src/crypto/fipsmodule/ec/p256.c
index 9f5694c..0d0e766 100644
--- a/src/crypto/fipsmodule/ec/p256.c
+++ b/src/crypto/fipsmodule/ec/p256.c
@@ -31,8 +31,10 @@
 #include "../delocate.h"
 #include "./internal.h"
 
+#if defined(OPENSSL_NO_ASM)
+#define FIAT_P256_NO_ASM
+#endif
 
-// MSVC does not implement uint128_t, and crashes with intrinsics
 #if defined(BORINGSSL_HAS_UINT128)
 #define BORINGSSL_NISTP256_64BIT 1
 #include "../../../third_party/fiat/p256_64.h"
diff --git a/src/crypto/fipsmodule/self_check/fips.c b/src/crypto/fipsmodule/self_check/fips.c
index d55c493..11c9309 100644
--- a/src/crypto/fipsmodule/self_check/fips.c
+++ b/src/crypto/fipsmodule/self_check/fips.c
@@ -28,6 +28,45 @@
 
 int FIPS_mode_set(int on) { return on == FIPS_mode(); }
 
+uint32_t FIPS_version(void) {
+  return 0;
+}
+
+int FIPS_query_algorithm_status(const char *algorithm) {
+#if defined(BORINGSSL_FIPS)
+  static const char kApprovedAlgorithms[][13] = {
+    "AES-CBC",
+    "AES-CCM",
+    "AES-CTR",
+    "AES-ECB",
+    "AES-GCM",
+    "AES-KW",
+    "AES-KWP",
+    "ctrDRBG",
+    "ECC-SSC",
+    "ECDSA-sign",
+    "ECDSA-verify",
+    "FFC-SSC",
+    "HMAC",
+    "RSA-sign",
+    "RSA-verify",
+    "SHA-1",
+    "SHA2-224",
+    "SHA2-256",
+    "SHA2-384",
+    "SHA2-512",
+    "SHA2-512/256",
+  };
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kApprovedAlgorithms); i++) {
+    if (strcmp(algorithm, kApprovedAlgorithms[i]) == 0) {
+      return 1;
+    }
+  }
+#endif  // BORINGSSL_FIPS
+
+  return 0;
+}
+
 #if defined(BORINGSSL_FIPS_COUNTERS)
 
 size_t FIPS_read_counter(enum fips_counter_t counter) {
diff --git a/src/crypto/fipsmodule/self_check/self_check.c b/src/crypto/fipsmodule/self_check/self_check.c
index b7cd868..b248789 100644
--- a/src/crypto/fipsmodule/self_check/self_check.c
+++ b/src/crypto/fipsmodule/self_check/self_check.c
@@ -35,6 +35,7 @@
 #include "../ec/internal.h"
 #include "../ecdsa/internal.h"
 #include "../rand/internal.h"
+#include "../rsa/internal.h"
 #include "../tls/internal.h"
 
 
diff --git a/src/crypto/mem.c b/src/crypto/mem.c
index d3b2112..15d3436 100644
--- a/src/crypto/mem.c
+++ b/src/crypto/mem.c
@@ -180,11 +180,17 @@
 
   size_t size = *(size_t *)ptr;
   OPENSSL_cleanse(ptr, size + OPENSSL_MALLOC_PREFIX);
+
+// ASan knows to intercept malloc and free, but not sdallocx.
+#if defined(OPENSSL_ASAN)
+  free(ptr);
+#else
   if (sdallocx) {
     sdallocx(ptr, size + OPENSSL_MALLOC_PREFIX, 0 /* flags */);
   } else {
     free(ptr);
   }
+#endif
 }
 
 void *OPENSSL_realloc(void *orig_ptr, size_t new_size) {
diff --git a/src/crypto/poly1305/poly1305.c b/src/crypto/poly1305/poly1305.c
index 2eb3974..e4e6298 100644
--- a/src/crypto/poly1305/poly1305.c
+++ b/src/crypto/poly1305/poly1305.c
@@ -204,6 +204,11 @@
                             size_t in_len) {
   struct poly1305_state_st *state = poly1305_aligned_state(statep);
 
+  // Work around a C language bug. See https://crbug.com/1019588.
+  if (in_len == 0) {
+    return;
+  }
+
 #if defined(OPENSSL_POLY1305_NEON)
   if (CRYPTO_is_NEON_capable()) {
     CRYPTO_poly1305_update_neon(statep, in, in_len);
diff --git a/src/crypto/x509/internal.h b/src/crypto/x509/internal.h
index 702464a..ff8288f 100644
--- a/src/crypto/x509/internal.h
+++ b/src/crypto/x509/internal.h
@@ -106,13 +106,14 @@
   STACK_OF(ASN1_TYPE) *set;
 } /* X509_ATTRIBUTE */;
 
-struct x509_cert_aux_st {
+typedef struct x509_cert_aux_st {
   STACK_OF(ASN1_OBJECT) *trust;   // trusted uses
   STACK_OF(ASN1_OBJECT) *reject;  // rejected uses
   ASN1_UTF8STRING *alias;         // "friendly name"
   ASN1_OCTET_STRING *keyid;       // key id of private key
-  STACK_OF(X509_ALGOR) *other;    // other unspecified info
-} /* X509_CERT_AUX */;
+} X509_CERT_AUX;
+
+DECLARE_ASN1_FUNCTIONS(X509_CERT_AUX)
 
 struct X509_extension_st {
   ASN1_OBJECT *object;
@@ -155,7 +156,7 @@
   STACK_OF(DIST_POINT) *crldp;
   STACK_OF(GENERAL_NAME) *altname;
   NAME_CONSTRAINTS *nc;
-  unsigned char sha1_hash[SHA_DIGEST_LENGTH];
+  unsigned char cert_hash[SHA256_DIGEST_LENGTH];
   X509_CERT_AUX *aux;
   CRYPTO_BUFFER *buf;
   CRYPTO_MUTEX lock;
@@ -218,7 +219,7 @@
   // CRL and base CRL numbers for delta processing
   ASN1_INTEGER *crl_number;
   ASN1_INTEGER *base_crl_number;
-  unsigned char sha1_hash[SHA_DIGEST_LENGTH];
+  unsigned char crl_hash[SHA256_DIGEST_LENGTH];
   STACK_OF(GENERAL_NAMES) *issuers;
   const X509_CRL_METHOD *meth;
   void *meth_data;
@@ -370,6 +371,8 @@
 
 ASN1_TYPE *ASN1_generate_v3(const char *str, X509V3_CTX *cnf);
 
+int X509_CERT_AUX_print(BIO *bp, X509_CERT_AUX *x, int indent);
+
 
 /* RSA-PSS functions. */
 
diff --git a/src/crypto/x509/t_x509.c b/src/crypto/x509/t_x509.c
index 10a7cad..4f9d409 100644
--- a/src/crypto/x509/t_x509.c
+++ b/src/crypto/x509/t_x509.c
@@ -134,13 +134,12 @@
         }
 
         const ASN1_INTEGER *serial = X509_get0_serialNumber(x);
-        /* |ASN1_INTEGER_get| returns -1 on overflow, so this check skips
-         * negative and large serial numbers. */
-        l = ASN1_INTEGER_get(serial);
-        if (l >= 0) {
+        uint64_t serial_u64;
+        if (ASN1_INTEGER_get_uint64(&serial_u64, serial)) {
             assert(serial->type != V_ASN1_NEG_INTEGER);
-            if (BIO_printf(bp, " %ld (0x%lx)\n", l, (unsigned long)l) <= 0) {
-                goto err;
+            if (BIO_printf(bp, " %" PRIu64 " (0x%" PRIx64 ")\n", serial_u64,
+                           serial_u64) <= 0) {
+              goto err;
             }
         } else {
             neg = (serial->type == V_ASN1_NEG_INTEGER) ? " (Negative)" : "";
diff --git a/src/crypto/x509/x509_cmp.c b/src/crypto/x509/x509_cmp.c
index 5811f44..e9e1d8c 100644
--- a/src/crypto/x509/x509_cmp.c
+++ b/src/crypto/x509/x509_cmp.c
@@ -101,7 +101,7 @@
 
 int X509_CRL_match(const X509_CRL *a, const X509_CRL *b)
 {
-    return OPENSSL_memcmp(a->sha1_hash, b->sha1_hash, 20);
+    return OPENSSL_memcmp(a->crl_hash, b->crl_hash, SHA256_DIGEST_LENGTH);
 }
 
 X509_NAME *X509_get_issuer_name(const X509 *a)
@@ -154,7 +154,7 @@
  */
 int X509_cmp(const X509 *a, const X509 *b)
 {
-    /* Fill in the |sha1_hash| fields.
+    /* Fill in the |cert_hash| fields.
      *
      * TODO(davidben): This may fail, in which case the the hash will be all
      * zeros. This produces a consistent comparison (failures are sticky), but
@@ -165,7 +165,7 @@
     x509v3_cache_extensions((X509 *)a);
     x509v3_cache_extensions((X509 *)b);
 
-    int rv = OPENSSL_memcmp(a->sha1_hash, b->sha1_hash, SHA_DIGEST_LENGTH);
+    int rv = OPENSSL_memcmp(a->cert_hash, b->cert_hash, SHA256_DIGEST_LENGTH);
     if (rv)
         return rv;
     /* Check for match against stored encoding too */
diff --git a/src/crypto/x509/x509_test.cc b/src/crypto/x509/x509_test.cc
index 38414e9..b201afe 100644
--- a/src/crypto/x509/x509_test.cc
+++ b/src/crypto/x509/x509_test.cc
@@ -3524,6 +3524,20 @@
 -----END CERTIFICATE-----
 )";
 
+// kHighTagNumber is an X.509 certificate where the outermost SEQUENCE tag uses
+// high tag number form.
+static const char kHighTagNumber[] = R"(
+-----BEGIN CERTIFICATE-----
+PxCCASAwgcagAwIBAgICBNIwCgYIKoZIzj0EAwIwDzENMAsGA1UEAxMEVGVzdDAg
+Fw0wMDAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowDzENMAsGA1UEAxMEVGVz
+dDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8eDZSKTNWXa7ep
+Hg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQtS89yIsGjEDAO
+MAwGA1UdEwQFMAMBAf8wCgYIKoZIzj0EAwIDSQAwRgIhAKnSIhfmzfQpeOKFHiAq
+cml3ex6oaVVGoJWCsPQoZjVAAiEAqTHS9HzZBTQ20cMPXUpf8u5AXZP7adeh4qnk
+soBsxWI=
+-----END CERTIFICATE-----
+)";
+
 TEST(X509Test, BER) {
   // Constructed strings are forbidden in DER.
   EXPECT_FALSE(CertFromPEM(kConstructedBitString));
@@ -3532,6 +3546,9 @@
   EXPECT_FALSE(CertFromPEM(kIndefiniteLength));
   // Padding bits in BIT STRINGs must be zero in BER.
   EXPECT_FALSE(CertFromPEM(kNonZeroPadding));
+  // Tags must be minimal in both BER and DER, though many BER decoders
+  // incorrectly support non-minimal tags.
+  EXPECT_FALSE(CertFromPEM(kHighTagNumber));
 }
 
 TEST(X509Test, Names) {
@@ -3874,3 +3891,23 @@
 
   EXPECT_EQ(sk_X509_OBJECT_num(X509_STORE_get0_objects(store.get())), 2u);
 }
+
+TEST(X509Test, BytesToHex) {
+  struct {
+    std::vector<uint8_t> bytes;
+    const char *hex;
+  } kTests[] = {
+      {{}, ""},
+      {{0x00}, "00"},
+      {{0x00, 0x11, 0x22}, "00:11:22"},
+      {{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef},
+       "01:23:45:67:89:AB:CD:EF"},
+  };
+  for (const auto &t : kTests) {
+    SCOPED_TRACE(Bytes(t.bytes));
+    bssl::UniquePtr<char> hex(
+        x509v3_bytes_to_hex(t.bytes.data(), t.bytes.size()));
+    ASSERT_TRUE(hex);
+    EXPECT_STREQ(hex.get(), t.hex);
+  }
+}
diff --git a/src/crypto/x509/x509_trs.c b/src/crypto/x509/x509_trs.c
index c95d6fc..d21548d 100644
--- a/src/crypto/x509/x509_trs.c
+++ b/src/crypto/x509/x509_trs.c
@@ -71,7 +71,6 @@
 static int trust_compat(X509_TRUST *trust, X509 *x, int flags);
 
 static int obj_trust(int id, X509 *x, int flags);
-static int (*default_trust) (int id, X509 *x, int flags) = obj_trust;
 
 /*
  * WARNING: the following table should be kept in order of trust and without
@@ -106,14 +105,6 @@
     return (*a)->trust - (*b)->trust;
 }
 
-int (*X509_TRUST_set_default(int (*trust) (int, X509 *, int))) (int, X509 *,
-                                                                int) {
-    int (*oldtrust) (int, X509 *, int);
-    oldtrust = default_trust;
-    default_trust = trust;
-    return oldtrust;
-}
-
 int X509_check_trust(X509 *x, int id, int flags)
 {
     X509_TRUST *pt;
@@ -130,7 +121,7 @@
     }
     idx = X509_TRUST_get_by_id(id);
     if (idx == -1)
-        return default_trust(id, x, flags);
+        return obj_trust(id, x, flags);
     pt = X509_TRUST_get0(idx);
     return pt->check_trust(pt, x, flags);
 }
diff --git a/src/crypto/x509/x_crl.c b/src/crypto/x509/x_crl.c
index f010849..ab2a039 100644
--- a/src/crypto/x509/x_crl.c
+++ b/src/crypto/x509/x_crl.c
@@ -251,7 +251,7 @@
         break;
 
     case ASN1_OP_D2I_POST:
-        if (!X509_CRL_digest(crl, EVP_sha1(), crl->sha1_hash, NULL)) {
+        if (!X509_CRL_digest(crl, EVP_sha256(), crl->crl_hash, NULL)) {
             return 0;
         }
 
diff --git a/src/crypto/x509/x_x509a.c b/src/crypto/x509/x_x509a.c
index fca02a6..d0e921f 100644
--- a/src/crypto/x509/x_x509a.c
+++ b/src/crypto/x509/x_x509a.c
@@ -78,7 +78,6 @@
         ASN1_IMP_SEQUENCE_OF_OPT(X509_CERT_AUX, reject, ASN1_OBJECT, 0),
         ASN1_OPT(X509_CERT_AUX, alias, ASN1_UTF8STRING),
         ASN1_OPT(X509_CERT_AUX, keyid, ASN1_OCTET_STRING),
-        ASN1_IMP_SEQUENCE_OF_OPT(X509_CERT_AUX, other, X509_ALGOR, 1)
 } ASN1_SEQUENCE_END(X509_CERT_AUX)
 
 IMPLEMENT_ASN1_FUNCTIONS(X509_CERT_AUX)
@@ -95,6 +94,9 @@
 int X509_alias_set1(X509 *x, const unsigned char *name, int len)
 {
     X509_CERT_AUX *aux;
+    /* TODO(davidben): Empty aliases are not meaningful in PKCS#12, and the
+     * getters cannot quite represent them. Also erase the object if |len| is
+     * zero. */
     if (!name) {
         if (!x || !x->aux || !x->aux->alias)
             return 1;
@@ -112,6 +114,9 @@
 int X509_keyid_set1(X509 *x, const unsigned char *id, int len)
 {
     X509_CERT_AUX *aux;
+    /* TODO(davidben): Empty key IDs are not meaningful in PKCS#12, and the
+     * getters cannot quite represent them. Also erase the object if |len| is
+     * zero. */
     if (!id) {
         if (!x || !x->aux || !x->aux->keyid)
             return 1;
@@ -126,22 +131,22 @@
     return ASN1_STRING_set(aux->keyid, id, len);
 }
 
-unsigned char *X509_alias_get0(X509 *x, int *len)
+unsigned char *X509_alias_get0(X509 *x, int *out_len)
 {
-    if (!x->aux || !x->aux->alias)
-        return NULL;
-    if (len)
-        *len = x->aux->alias->length;
-    return x->aux->alias->data;
+    const ASN1_UTF8STRING *alias = x->aux != NULL ? x->aux->alias : NULL;
+    if (out_len != NULL) {
+        *out_len = alias != NULL ? alias->length : 0;
+    }
+    return alias != NULL ? alias->data : NULL;
 }
 
-unsigned char *X509_keyid_get0(X509 *x, int *len)
+unsigned char *X509_keyid_get0(X509 *x, int *out_len)
 {
-    if (!x->aux || !x->aux->keyid)
-        return NULL;
-    if (len)
-        *len = x->aux->keyid->length;
-    return x->aux->keyid->data;
+    const ASN1_OCTET_STRING *keyid = x->aux != NULL ? x->aux->keyid : NULL;
+    if (out_len != NULL) {
+        *out_len = keyid != NULL ? keyid->length : 0;
+    }
+    return keyid != NULL ? keyid->data : NULL;
 }
 
 int X509_add1_trust_object(X509 *x, ASN1_OBJECT *obj)
diff --git a/src/crypto/x509v3/internal.h b/src/crypto/x509v3/internal.h
index 3e6081b..976e34d 100644
--- a/src/crypto/x509v3/internal.h
+++ b/src/crypto/x509v3/internal.h
@@ -70,21 +70,21 @@
 #endif
 
 
-// x509v3_bytes_to_hex encodes |len| bytes from |buffer| to hex and returns a
+// x509v3_bytes_to_hex encodes |len| bytes from |in| to hex and returns a
 // newly-allocated NUL-terminated string containing the result, or NULL on
 // allocation error.
 //
-// Note this function was historically named |hex_to_string| in OpenSSL, not
-// |string_to_hex|.
-char *x509v3_bytes_to_hex(const unsigned char *buffer, long len);
+// This function was historically named |hex_to_string| in OpenSSL. Despite the
+// name, |hex_to_string| converted to hex.
+OPENSSL_EXPORT char *x509v3_bytes_to_hex(const uint8_t *in, size_t len);
 
 // x509v3_hex_string_to_bytes decodes |str| in hex and returns a newly-allocated
 // array containing the result, or NULL on error. On success, it sets |*len| to
 // the length of the result. Colon separators between bytes in the input are
 // allowed and ignored.
 //
-// Note this function was historically named |string_to_hex| in OpenSSL, not
-// |hex_to_string|.
+// This function was historically named |string_to_hex| in OpenSSL. Despite the
+// name, |string_to_hex| converted from hex.
 unsigned char *x509v3_hex_to_bytes(const char *str, long *len);
 
 // x509v3_name_cmp returns zero if |name| is equal to |cmp| or begins with |cmp|
diff --git a/src/crypto/x509v3/v3_akey.c b/src/crypto/x509v3/v3_akey.c
index 0aba20e..e64e99f 100644
--- a/src/crypto/x509v3/v3_akey.c
+++ b/src/crypto/x509v3/v3_akey.c
@@ -93,10 +93,10 @@
                                                  STACK_OF(CONF_VALUE)
                                                  *extlist)
 {
-    char *tmp = NULL;
     int extlist_was_null = extlist == NULL;
     if (akeyid->keyid) {
-        tmp = x509v3_bytes_to_hex(akeyid->keyid->data, akeyid->keyid->length);
+        char *tmp = x509v3_bytes_to_hex(akeyid->keyid->data,
+                                        akeyid->keyid->length);
         int ok = tmp != NULL && X509V3_add_value("keyid", tmp, &extlist);
         OPENSSL_free(tmp);
         if (!ok) {
@@ -112,10 +112,7 @@
         extlist = tmpextlist;
     }
     if (akeyid->serial) {
-        tmp = x509v3_bytes_to_hex(akeyid->serial->data, akeyid->serial->length);
-        int ok = tmp != NULL && X509V3_add_value("serial", tmp, &extlist);
-        OPENSSL_free(tmp);
-        if (!ok) {
+        if (!X509V3_add_value_int("serial", akeyid->serial, &extlist)) {
             goto err;
         }
     }
diff --git a/src/crypto/x509v3/v3_purp.c b/src/crypto/x509v3/v3_purp.c
index 133839a..909a8db 100644
--- a/src/crypto/x509v3/v3_purp.c
+++ b/src/crypto/x509v3/v3_purp.c
@@ -437,7 +437,7 @@
         return (x->ex_flags & EXFLAG_INVALID) == 0;
     }
 
-    if (!X509_digest(x, EVP_sha1(), x->sha1_hash, NULL))
+    if (!X509_digest(x, EVP_sha256(), x->cert_hash, NULL))
         x->ex_flags |= EXFLAG_INVALID;
     /* V1 should mean no extensions ... */
     if (X509_get_version(x) == X509_VERSION_1)
diff --git a/src/crypto/x509v3/v3_utl.c b/src/crypto/x509v3/v3_utl.c
index 474acf8..960c407 100644
--- a/src/crypto/x509v3/v3_utl.c
+++ b/src/crypto/x509v3/v3_utl.c
@@ -63,6 +63,7 @@
 #include <string.h>
 
 #include <openssl/bn.h>
+#include <openssl/bytestring.h>
 #include <openssl/conf.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
@@ -467,33 +468,33 @@
 
 /* hex string utilities */
 
-/*
- * Given a buffer of length 'len' return a OPENSSL_malloc'ed string with its
- * hex representation @@@ (Contents of buffer are always kept in ASCII, also
- * on EBCDIC machines)
- */
-
-char *x509v3_bytes_to_hex(const unsigned char *buffer, long len)
+char *x509v3_bytes_to_hex(const uint8_t *in, size_t len)
 {
-    char *tmp, *q;
-    const unsigned char *p;
-    int i;
-    static const char hexdig[] = "0123456789ABCDEF";
-    if (!buffer || !len)
-        return NULL;
-    if (!(tmp = OPENSSL_malloc(len * 3 + 1))) {
-        OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
-        return NULL;
+    CBB cbb;
+    if (!CBB_init(&cbb, len * 3 + 1)) {
+        goto err;
     }
-    q = tmp;
-    for (i = 0, p = buffer; i < len; i++, p++) {
-        *q++ = hexdig[(*p >> 4) & 0xf];
-        *q++ = hexdig[*p & 0xf];
-        *q++ = ':';
+    for (size_t i = 0; i < len; i++) {
+        static const char hex[] = "0123456789ABCDEF";
+        if ((i > 0 && !CBB_add_u8(&cbb, ':')) ||
+            !CBB_add_u8(&cbb, hex[in[i] >> 4]) ||
+            !CBB_add_u8(&cbb, hex[in[i] & 0xf])) {
+            goto err;
+        }
     }
-    q[-1] = 0;
+    uint8_t *ret;
+    size_t unused_len;
+    if (!CBB_add_u8(&cbb, 0) ||
+        !CBB_finish(&cbb, &ret, &unused_len)) {
+        goto err;
+    }
 
-    return tmp;
+    return (char *)ret;
+
+err:
+    OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
+    CBB_cleanup(&cbb);
+    return NULL;
 }
 
 unsigned char *x509v3_hex_to_bytes(const char *str, long *len)
diff --git a/src/include/openssl/asn1.h b/src/include/openssl/asn1.h
index d8a371c..d6fa2f7 100644
--- a/src/include/openssl/asn1.h
+++ b/src/include/openssl/asn1.h
@@ -605,7 +605,9 @@
 OPENSSL_EXPORT int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b);
 
 // ASN1_STRING_set sets the contents of |str| to a copy of |len| bytes from
-// |data|. It returns one on success and zero on error.
+// |data|. It returns one on success and zero on error. If |data| is NULL, it
+// updates the length and allocates the buffer as needed, but does not
+// initialize the contents.
 OPENSSL_EXPORT int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len);
 
 // ASN1_STRING_set0 sets the contents of |str| to |len| bytes from |data|. It
@@ -1014,6 +1016,12 @@
 // |V_ASN1_INTEGER| or |V_ASN1_ENUMERATED|, while negative values have a type of
 // |V_ASN1_NEG_INTEGER| or |V_ASN1_NEG_ENUMERATED|. Note this differs from DER's
 // two's complement representation.
+//
+// The data in the |ASN1_STRING| may not have leading zeros. Note this means
+// zero is represented as the empty string. Parsing functions will never return
+// invalid representations. If an invalid input is constructed, the marshaling
+// functions will skip leading zeros, however other functions, such as
+// |ASN1_INTEGER_cmp| or |ASN1_INTEGER_get|, may not return the correct result.
 
 DEFINE_STACK_OF(ASN1_INTEGER)
 
@@ -1068,16 +1076,25 @@
 // |ASN1_INTEGER*|.
 DECLARE_ASN1_ITEM(ASN1_INTEGER)
 
-// ASN1_INTEGER_set sets |a| to an INTEGER with value |v|. It returns one on
-// success and zero on error.
-OPENSSL_EXPORT int ASN1_INTEGER_set(ASN1_INTEGER *a, long v);
-
 // ASN1_INTEGER_set_uint64 sets |a| to an INTEGER with value |v|. It returns one
 // on success and zero on error.
 OPENSSL_EXPORT int ASN1_INTEGER_set_uint64(ASN1_INTEGER *out, uint64_t v);
 
+// ASN1_INTEGER_set sets |a| to an INTEGER with value |v|. It returns one on
+// success and zero on error.
+OPENSSL_EXPORT int ASN1_INTEGER_set(ASN1_INTEGER *a, long v);
+
+// ASN1_INTEGER_get_uint64 converts |a| to a |uint64_t|. On success, it returns
+// one and sets |*out| to the result. If |a| did not fit or has the wrong type,
+// it returns zero.
+OPENSSL_EXPORT int ASN1_INTEGER_get_uint64(uint64_t *out,
+                                           const ASN1_INTEGER *a);
+
 // ASN1_INTEGER_get returns the value of |a| as a |long|, or -1 if |a| is out of
 // range or the wrong type.
+//
+// WARNING: This function's return value cannot distinguish errors from -1.
+// Prefer |ASN1_INTEGER_get_uint64|.
 OPENSSL_EXPORT long ASN1_INTEGER_get(const ASN1_INTEGER *a);
 
 // BN_to_ASN1_INTEGER sets |ai| to an INTEGER with value |bn| and returns |ai|
@@ -1123,18 +1140,31 @@
 // |ASN1_ENUMERATED*|.
 DECLARE_ASN1_ITEM(ASN1_ENUMERATED)
 
+// ASN1_ENUMERATED_set_uint64 sets |a| to an ENUMERATED with value |v|. It
+// returns one on success and zero on error.
+OPENSSL_EXPORT int ASN1_ENUMERATED_set_uint64(ASN1_ENUMERATED *out, uint64_t v);
+
 // ASN1_ENUMERATED_set sets |a| to an ENUMERATED with value |v|. It returns one
 // on success and zero on error.
 OPENSSL_EXPORT int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v);
 
+// ASN1_ENUMERATED_get_uint64 converts |a| to a |uint64_t|. On success, it
+// returns one and sets |*out| to the result. If |a| did not fit or has the
+// wrong type, it returns zero.
+OPENSSL_EXPORT int ASN1_ENUMERATED_get_uint64(uint64_t *out,
+                                              const ASN1_ENUMERATED *a);
+
 // ASN1_ENUMERATED_get returns the value of |a| as a |long|, or -1 if |a| is out
 // of range or the wrong type.
+//
+// WARNING: This function's return value cannot distinguish errors from -1.
+// Prefer |ASN1_ENUMERATED_get_uint64|.
 OPENSSL_EXPORT long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a);
 
 // BN_to_ASN1_ENUMERATED sets |ai| to an ENUMERATED with value |bn| and returns
 // |ai| on success or NULL or error. If |ai| is NULL, it returns a
-// newly-allocated |ASN1_INTEGER| on success instead, which the caller must
-// release with |ASN1_INTEGER_free|.
+// newly-allocated |ASN1_ENUMERATED| on success instead, which the caller must
+// release with |ASN1_ENUMERATED_free|.
 OPENSSL_EXPORT ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn,
                                                       ASN1_ENUMERATED *ai);
 
@@ -2035,5 +2065,7 @@
 #define ASN1_R_NESTED_TOO_DEEP 192
 #define ASN1_R_BAD_TEMPLATE 193
 #define ASN1_R_INVALID_BIT_STRING_PADDING 194
+#define ASN1_R_WRONG_INTEGER_TYPE 195
+#define ASN1_R_INVALID_INTEGER 196
 
 #endif
diff --git a/src/include/openssl/asn1t.h b/src/include/openssl/asn1t.h
index dccbd1a..b65272d 100644
--- a/src/include/openssl/asn1t.h
+++ b/src/include/openssl/asn1t.h
@@ -260,7 +260,6 @@
 /* Any defined by macros: the field used is in the table itself */
 
 #define ASN1_ADB_OBJECT(tblname) { ASN1_TFLG_ADB_OID, -1, 0, #tblname, (const ASN1_ITEM *)&(tblname##_adb) }
-#define ASN1_ADB_INTEGER(tblname) { ASN1_TFLG_ADB_INT, -1, 0, #tblname, (const ASN1_ITEM *)&(tblname##_adb) }
 /* Plain simple type */
 #define ASN1_SIMPLE(stname, field, type) ASN1_EX_TYPE(0,0, stname, field, type)
 
@@ -377,7 +376,7 @@
 };
 
 struct ASN1_ADB_TABLE_st {
-	long value;		/* NID for an object or value for an int */
+	int value;		/* NID for an object */
 	const ASN1_TEMPLATE tt;		/* item for this value */
 };
 
@@ -442,8 +441,6 @@
 
 #define ASN1_TFLG_ADB_OID	(0x1<<8)
 
-#define ASN1_TFLG_ADB_INT	(0x1<<9)
-
 /* This flag means a parent structure is passed
  * instead of the field: this is useful is a
  * SEQUENCE is being combined with a CHOICE for
diff --git a/src/include/openssl/base.h b/src/include/openssl/base.h
index 983eadc..b630236 100644
--- a/src/include/openssl/base.h
+++ b/src/include/openssl/base.h
@@ -195,7 +195,7 @@
 // A consumer may use this symbol in the preprocessor to temporarily build
 // against multiple revisions of BoringSSL at the same time. It is not
 // recommended to do so for longer than is necessary.
-#define BORINGSSL_API_VERSION 16
+#define BORINGSSL_API_VERSION 17
 
 #if defined(BORINGSSL_SHARED_LIBRARY)
 
@@ -448,7 +448,6 @@
 typedef struct trust_token_method_st TRUST_TOKEN_METHOD;
 typedef struct v3_ext_ctx X509V3_CTX;
 typedef struct x509_attributes_st X509_ATTRIBUTE;
-typedef struct x509_cert_aux_st X509_CERT_AUX;
 typedef struct x509_crl_method_st X509_CRL_METHOD;
 typedef struct x509_lookup_st X509_LOOKUP;
 typedef struct x509_lookup_method_st X509_LOOKUP_METHOD;
diff --git a/src/include/openssl/bn.h b/src/include/openssl/bn.h
index a95a894..d9491a9 100644
--- a/src/include/openssl/bn.h
+++ b/src/include/openssl/bn.h
@@ -584,9 +584,14 @@
                                         const BIGNUM *m);
 
 // BN_mod_sqrt returns a newly-allocated |BIGNUM|, r, such that
-// r^2 == a (mod p). |p| must be a prime. It returns NULL on error or if |a| is
-// not a square mod |p|. In the latter case, it will add |BN_R_NOT_A_SQUARE| to
-// the error queue.
+// r^2 == a (mod p). It returns NULL on error or if |a| is not a square mod |p|.
+// In the latter case, it will add |BN_R_NOT_A_SQUARE| to the error queue.
+// If |a| is a square and |p| > 2, there are two possible square roots. This
+// function may return either and may even select one non-deterministically.
+//
+// This function only works if |p| is a prime. If |p| is composite, it may fail
+// or return an arbitrary value. Callers should not pass attacker-controlled
+// values of |p|.
 OPENSSL_EXPORT BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p,
                                    BN_CTX *ctx);
 
diff --git a/src/include/openssl/bytestring.h b/src/include/openssl/bytestring.h
index 5ef3742..199d89c 100644
--- a/src/include/openssl/bytestring.h
+++ b/src/include/openssl/bytestring.h
@@ -259,15 +259,17 @@
 
 // CBS_get_any_ber_asn1_element acts the same as |CBS_get_any_asn1_element| but
 // also allows indefinite-length elements to be returned and does not enforce
-// that lengths are minimal. For indefinite-lengths, |*out_header_len| and
+// that lengths are minimal. It sets |*out_indefinite| to one if the length was
+// indefinite and zero otherwise. If indefinite, |*out_header_len| and
 // |CBS_len(out)| will be equal as only the header is returned (although this is
-// also true for empty elements so the length must be checked too). If
+// also true for empty elements so |*out_indefinite| should be checked). If
 // |out_ber_found| is not NULL then it is set to one if any case of invalid DER
 // but valid BER is found, and to zero otherwise.
 OPENSSL_EXPORT int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out,
                                                 unsigned *out_tag,
                                                 size_t *out_header_len,
-                                                int *out_ber_found);
+                                                int *out_ber_found,
+                                                int *out_indefinite);
 
 // CBS_get_asn1_uint64 gets an ASN.1 INTEGER from |cbs| using |CBS_get_asn1|
 // and sets |*out| to its value. It returns one on success and zero on error,
diff --git a/src/include/openssl/crypto.h b/src/include/openssl/crypto.h
index 0824b93..117b347 100644
--- a/src/include/openssl/crypto.h
+++ b/src/include/openssl/crypto.h
@@ -59,6 +59,12 @@
 // success and zero on error.
 OPENSSL_EXPORT int BORINGSSL_self_test(void);
 
+// BORINGSSL_integrity_test triggers the module's integrity test where the code
+// and data of the module is matched against a hash injected at build time. It
+// returns one on success or zero if there's a mismatch. This function only
+// exists if the module was built in FIPS mode without ASAN.
+OPENSSL_EXPORT int BORINGSSL_integrity_test(void);
+
 // CRYPTO_pre_sandbox_init initializes the crypto library, pre-acquiring some
 // unusual resources to aid running in sandboxed environments. It is safe to
 // call this function multiple times and concurrently from multiple threads.
@@ -172,6 +178,18 @@
 // |BORINGSSL_FIPS| and zero otherwise.
 OPENSSL_EXPORT int FIPS_mode_set(int on);
 
+// FIPS_version returns the version of the FIPS module, or zero if the build
+// isn't exactly at a verified version. The version, expressed in base 10, will
+// be a date in the form yyyymmddXX where XX is often "00", but can be
+// incremented if multiple versions are defined on a single day.
+//
+// (This format exceeds a |uint32_t| in the year 4294.)
+OPENSSL_EXPORT uint32_t FIPS_version(void);
+
+// FIPS_query_algorithm_status returns one if |algorithm| is FIPS validated in
+// the current BoringSSL and zero otherwise.
+OPENSSL_EXPORT int FIPS_query_algorithm_status(const char *algorithm);
+
 
 #if defined(__cplusplus)
 }  // extern C
diff --git a/src/include/openssl/ec.h b/src/include/openssl/ec.h
index cc8138d..8339bfb 100644
--- a/src/include/openssl/ec.h
+++ b/src/include/openssl/ec.h
@@ -323,7 +323,15 @@
 // |EC_GROUP_cmp| (even to themselves). |EC_GROUP_get_curve_name| will always
 // return |NID_undef|.
 //
-// Avoid using arbitrary curves and use |EC_GROUP_new_by_curve_name| instead.
+// This function is provided for compatibility with some legacy applications
+// only. Avoid using arbitrary curves and use |EC_GROUP_new_by_curve_name|
+// instead. This ensures the result meets preconditions necessary for
+// elliptic curve algorithms to function correctly and securely.
+//
+// Given invalid parameters, this function may fail or it may return an
+// |EC_GROUP| which breaks these preconditions. Subsequent operations may then
+// return arbitrary, incorrect values. Callers should not pass
+// attacker-controlled values to this function.
 OPENSSL_EXPORT EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p,
                                                 const BIGNUM *a,
                                                 const BIGNUM *b, BN_CTX *ctx);
diff --git a/src/include/openssl/ec_key.h b/src/include/openssl/ec_key.h
index 3a40856..502bfc2 100644
--- a/src/include/openssl/ec_key.h
+++ b/src/include/openssl/ec_key.h
@@ -167,8 +167,9 @@
 // about the problem can be found on the error stack.
 OPENSSL_EXPORT int EC_KEY_check_key(const EC_KEY *key);
 
-// EC_KEY_check_fips performs a signing pairwise consistency test (FIPS 140-2
-// 4.9.2). It returns one if it passes and zero otherwise.
+// EC_KEY_check_fips performs both a signing pairwise consistency test
+// (FIPS 140-2 4.9.2) and the consistency test from SP 800-56Ar3 section
+// 5.6.2.1.4. It returns one if it passes and zero otherwise.
 OPENSSL_EXPORT int EC_KEY_check_fips(const EC_KEY *key);
 
 // EC_KEY_set_public_key_affine_coordinates sets the public key in |key| to
@@ -194,7 +195,9 @@
 OPENSSL_EXPORT int EC_KEY_generate_key(EC_KEY *key);
 
 // EC_KEY_generate_key_fips behaves like |EC_KEY_generate_key| but performs
-// additional checks for FIPS compliance.
+// additional checks for FIPS compliance. This function is applicable when
+// generating keys for either signing/verification or key agreement because
+// both types of consistency check (PCT) are performed.
 OPENSSL_EXPORT int EC_KEY_generate_key_fips(EC_KEY *key);
 
 // EC_KEY_derive_from_secret deterministically derives a private key for |group|
diff --git a/src/include/openssl/span.h b/src/include/openssl/span.h
index 79f1d41..38e9a96 100644
--- a/src/include/openssl/span.h
+++ b/src/include/openssl/span.h
@@ -96,6 +96,16 @@
  private:
   static const size_t npos = static_cast<size_t>(-1);
 
+  // Heuristically test whether C is a container type that can be converted into
+  // a Span by checking for data() and size() member functions.
+  //
+  // TODO(davidben): Require C++14 support and switch to std::enable_if_t.
+  // Perhaps even C++17 now?
+  template <typename C>
+  using EnableIfContainer = typename std::enable_if<
+      std::is_convertible<decltype(std::declval<C>().data()), T *>::value &&
+      std::is_integral<decltype(std::declval<C>().size())>::value>::type;
+
  public:
   constexpr Span() : Span(nullptr, 0) {}
   constexpr Span(T *ptr, size_t len) : data_(ptr), size_(len) {}
@@ -104,27 +114,12 @@
   constexpr Span(T (&array)[N]) : Span(array, N) {}
 
   template <
-      typename C,
-      // TODO(davidben): Switch everything to std::enable_if_t when we remove
-      // support for MSVC 2015. Although we could write our own enable_if_t and
-      // MSVC 2015 has std::enable_if_t anyway, MSVC 2015's SFINAE
-      // implementation is problematic and does not work below unless we write
-      // the ::type at use.
-      //
-      // TODO(davidben): Move this and the identical copy below into an
-      // EnableIfContainer alias when we drop MSVC 2015 support. MSVC 2015's
-      // SFINAE support cannot handle type aliases.
-      typename = typename std::enable_if<
-          std::is_convertible<decltype(std::declval<C>().data()), T *>::value &&
-          std::is_integral<decltype(std::declval<C>().size())>::value>::type,
+      typename C, typename = EnableIfContainer<C>,
       typename = typename std::enable_if<std::is_const<T>::value, C>::type>
   Span(const C &container) : data_(container.data()), size_(container.size()) {}
 
   template <
-      typename C,
-      typename = typename std::enable_if<
-          std::is_convertible<decltype(std::declval<C>().data()), T *>::value &&
-          std::is_integral<decltype(std::declval<C>().size())>::value>::type,
+      typename C, typename = EnableIfContainer<C>,
       typename = typename std::enable_if<!std::is_const<T>::value, C>::type>
   explicit Span(C &container)
       : data_(container.data()), size_(container.size()) {}
diff --git a/src/include/openssl/thread.h b/src/include/openssl/thread.h
index 91706fe..c6e357e 100644
--- a/src/include/openssl/thread.h
+++ b/src/include/openssl/thread.h
@@ -77,14 +77,13 @@
 typedef union crypto_mutex_st {
   void *handle;
 } CRYPTO_MUTEX;
-#elif defined(__MACH__) && defined(__APPLE__)
+#elif !defined(__GLIBC__)
 typedef pthread_rwlock_t CRYPTO_MUTEX;
 #else
-// It is reasonable to include pthread.h on non-Windows systems, however the
-// |pthread_rwlock_t| that we need is hidden under feature flags, and we can't
-// ensure that we'll be able to get it. It's statically asserted that this
-// structure is large enough to contain a |pthread_rwlock_t| by
-// thread_pthread.c.
+// On glibc, |pthread_rwlock_t| is hidden under feature flags, and we can't
+// ensure that we'll be able to get it from a public header. It's statically
+// asserted that this structure is large enough to contain a |pthread_rwlock_t|
+// by thread_pthread.c.
 typedef union crypto_mutex_st {
   double alignment;
   uint8_t padding[3*sizeof(int) + 5*sizeof(unsigned) + 16 + 8];
diff --git a/src/include/openssl/x509.h b/src/include/openssl/x509.h
index 6696988..3633186 100644
--- a/src/include/openssl/x509.h
+++ b/src/include/openssl/x509.h
@@ -858,7 +858,6 @@
 OPENSSL_EXPORT int X509_NAME_set(X509_NAME **xn, X509_NAME *name);
 
 DECLARE_ASN1_FUNCTIONS(X509)
-DECLARE_ASN1_FUNCTIONS(X509_CERT_AUX)
 
 // X509_up_ref adds one to the reference count of |x509| and returns one.
 OPENSSL_EXPORT int X509_up_ref(X509 *x509);
@@ -869,9 +868,6 @@
                                          CRYPTO_EX_free *free_func);
 OPENSSL_EXPORT int X509_set_ex_data(X509 *r, int idx, void *arg);
 OPENSSL_EXPORT void *X509_get_ex_data(X509 *r, int idx);
-OPENSSL_EXPORT int i2d_X509_AUX(X509 *a, unsigned char **pp);
-OPENSSL_EXPORT X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp,
-                                  long length);
 
 // i2d_re_X509_tbs serializes the TBSCertificate portion of |x509|, as described
 // in |i2d_SAMPLE|.
@@ -924,19 +920,81 @@
 // a known NID.
 OPENSSL_EXPORT int X509_get_signature_nid(const X509 *x509);
 
-OPENSSL_EXPORT int X509_alias_set1(X509 *x, const unsigned char *name, int len);
-OPENSSL_EXPORT int X509_keyid_set1(X509 *x, const unsigned char *id, int len);
-OPENSSL_EXPORT unsigned char *X509_alias_get0(X509 *x, int *len);
-OPENSSL_EXPORT unsigned char *X509_keyid_get0(X509 *x, int *len);
-OPENSSL_EXPORT int (*X509_TRUST_set_default(int (*trust)(int, X509 *,
-                                                         int)))(int, X509 *,
-                                                                int);
-OPENSSL_EXPORT int X509_TRUST_set(int *t, int trust);
+
+// Auxiliary properties.
+//
+// |X509| objects optionally maintain auxiliary properties. These are not part
+// of the certificates themselves, and thus are not covered by signatures or
+// preserved by the standard serialization. They are used as inputs or outputs
+// to other functions in this library.
+
+// i2d_X509_AUX marshals |x509| as a DER-encoded X.509 Certificate (RFC 5280),
+// followed optionally by a separate, OpenSSL-specific structure with auxiliary
+// properties. It behaves as described in |i2d_SAMPLE|.
+//
+// Unlike similarly-named functions, this function does not output a single
+// ASN.1 element. Directly embedding the output in a larger ASN.1 structure will
+// not behave correctly.
+OPENSSL_EXPORT int i2d_X509_AUX(X509 *x509, unsigned char **outp);
+
+// d2i_X509_AUX parses up to |length| bytes from |*inp| as a DER-encoded X.509
+// Certificate (RFC 5280), followed optionally by a separate, OpenSSL-specific
+// structure with auxiliary properties. It behaves as described in
+// |d2i_SAMPLE_with_reuse|.
+//
+// Some auxiliary properties affect trust decisions, so this function should not
+// be used with untrusted input.
+//
+// Unlike similarly-named functions, this function does not parse a single
+// ASN.1 element. Trying to parse data directly embedded in a larger ASN.1
+// structure will not behave correctly.
+OPENSSL_EXPORT X509 *d2i_X509_AUX(X509 **x509, const unsigned char **inp,
+                                  long length);
+
+// X509_alias_set1 sets |x509|'s alias to |len| bytes from |name|. If |name| is
+// NULL, the alias is cleared instead. Aliases are not part of the certificate
+// itself and will not be serialized by |i2d_X509|.
+OPENSSL_EXPORT int X509_alias_set1(X509 *x509, const unsigned char *name,
+                                   int len);
+
+// X509_keyid_set1 sets |x509|'s key ID to |len| bytes from |id|. If |id| is
+// NULL, the key ID is cleared instead. Key IDs are not part of the certificate
+// itself and will not be serialized by |i2d_X509|.
+OPENSSL_EXPORT int X509_keyid_set1(X509 *x509, const unsigned char *id,
+                                   int len);
+
+// X509_alias_get0 looks up |x509|'s alias. If found, it sets |*out_len| to the
+// alias's length and returns a pointer to a buffer containing the contents. If
+// not found, it outputs the empty string by returning NULL and setting
+// |*out_len| to zero.
+//
+// If |x509| was parsed from a PKCS#12 structure (see
+// |PKCS12_get_key_and_certs|), the alias will reflect the friendlyName
+// attribute (RFC 2985).
+//
+// WARNING: In OpenSSL, this function did not set |*out_len| when the alias was
+// missing. Callers that target both OpenSSL and BoringSSL should set the value
+// to zero before calling this function.
+OPENSSL_EXPORT unsigned char *X509_alias_get0(X509 *x509, int *out_len);
+
+// X509_keyid_get0 looks up |x509|'s key ID. If found, it sets |*out_len| to the
+// key ID's length and returns a pointer to a buffer containing the contents. If
+// not found, it outputs the empty string by returning NULL and setting
+// |*out_len| to zero.
+//
+// WARNING: In OpenSSL, this function did not set |*out_len| when the alias was
+// missing. Callers that target both OpenSSL and BoringSSL should set the value
+// to zero before calling this function.
+OPENSSL_EXPORT unsigned char *X509_keyid_get0(X509 *x509, int *out_len);
+
 OPENSSL_EXPORT int X509_add1_trust_object(X509 *x, ASN1_OBJECT *obj);
 OPENSSL_EXPORT int X509_add1_reject_object(X509 *x, ASN1_OBJECT *obj);
 OPENSSL_EXPORT void X509_trust_clear(X509 *x);
 OPENSSL_EXPORT void X509_reject_clear(X509 *x);
 
+
+OPENSSL_EXPORT int X509_TRUST_set(int *t, int trust);
+
 DECLARE_ASN1_FUNCTIONS(X509_REVOKED)
 DECLARE_ASN1_FUNCTIONS(X509_CRL)
 
@@ -1300,7 +1358,6 @@
                                  unsigned long cflag);
 OPENSSL_EXPORT int X509_print(BIO *bp, X509 *x);
 OPENSSL_EXPORT int X509_ocspid_print(BIO *bp, X509 *x);
-OPENSSL_EXPORT int X509_CERT_AUX_print(BIO *bp, X509_CERT_AUX *x, int indent);
 OPENSSL_EXPORT int X509_CRL_print(BIO *bp, X509_CRL *x);
 OPENSSL_EXPORT int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflag,
                                      unsigned long cflag);
diff --git a/src/rust/build.rs b/src/rust/build.rs
index f6ce794..b029223 100644
--- a/src/rust/build.rs
+++ b/src/rust/build.rs
@@ -13,14 +13,27 @@
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+use std::env;
+use std::path::Path;
+
 fn main() {
+    let dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let crate_path = Path::new(&dir);
+    let parent_path = crate_path.parent().unwrap();
+
     // Statically link libraries.
-    println!("cargo:rustc-link-search=native=../crypto");
+    println!(
+        "cargo:rustc-link-search=native={}",
+        parent_path.join("crypto").display()
+    );
     println!("cargo:rustc-link-lib=static=crypto");
 
-    println!("cargo:rustc-link-search=native=../ssl");
+    println!(
+        "cargo:rustc-link-search=native={}",
+        parent_path.join("ssl").display()
+    );
     println!("cargo:rustc-link-lib=static=ssl");
 
-    println!("cargo:rustc-link-search=native=.");
+    println!("cargo:rustc-link-search=native={}", crate_path.display());
     println!("cargo:rustc-link-lib=static=rust_wrapper");
 }
diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs
index d8c2c00..b691bab 100644
--- a/src/rust/src/lib.rs
+++ b/src/rust/src/lib.rs
@@ -2,8 +2,8 @@
 #![allow(non_camel_case_types)]
 #![allow(non_snake_case)]
 
-// populated by cmake
-${INCLUDES}
+// ANDROID: Use Soong-generated bindings rather than CMake-generated
+pub use bssl_sys_raw::*;
 
 pub fn ERR_GET_LIB(packed_error: u32) -> i32 {
     unsafe { ERR_GET_LIB_RUST(packed_error) }
diff --git a/src/ssl/handshake_server.cc b/src/ssl/handshake_server.cc
index 15820be..7678904 100644
--- a/src/ssl/handshake_server.cc
+++ b/src/ssl/handshake_server.cc
@@ -418,7 +418,7 @@
   // JDK 11 always sends extensions in a particular order.
   constexpr uint16_t kMaxFragmentLength = 0x0001;
   constexpr uint16_t kStatusRequestV2 = 0x0011;
-  static CONSTEXPR_ARRAY struct {
+  static constexpr struct {
     uint16_t id;
     bool required;
   } kJavaExtensions[] = {
diff --git a/src/ssl/internal.h b/src/ssl/internal.h
index 8f68fc5..0087e7f 100644
--- a/src/ssl/internal.h
+++ b/src/ssl/internal.h
@@ -245,14 +245,6 @@
   { abort(); }
 #endif
 
-// CONSTEXPR_ARRAY works around a VS 2015 bug where ranged for loops don't work
-// on constexpr arrays.
-#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER < 1910
-#define CONSTEXPR_ARRAY const
-#else
-#define CONSTEXPR_ARRAY constexpr
-#endif
-
 // Array<T> is an owning array of elements of |T|.
 template <typename T>
 class Array {
diff --git a/src/ssl/ssl_key_share.cc b/src/ssl/ssl_key_share.cc
index c847a0a..920f25e 100644
--- a/src/ssl/ssl_key_share.cc
+++ b/src/ssl/ssl_key_share.cc
@@ -290,7 +290,7 @@
   HRSS_private_key hrss_private_key_;
 };
 
-CONSTEXPR_ARRAY NamedGroup kNamedGroups[] = {
+constexpr NamedGroup kNamedGroups[] = {
     {NID_secp224r1, SSL_CURVE_SECP224R1, "P-224", "secp224r1"},
     {NID_X9_62_prime256v1, SSL_CURVE_SECP256R1, "P-256", "prime256v1"},
     {NID_secp384r1, SSL_CURVE_SECP384R1, "P-384", "secp384r1"},
diff --git a/src/third_party/fiat/LICENSE b/src/third_party/fiat/LICENSE
index bd46c61..70cae03 100644
--- a/src/third_party/fiat/LICENSE
+++ b/src/third_party/fiat/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2015-2016 the fiat-crypto authors (see
+Copyright (c) 2015-2020 the fiat-crypto authors (see
 https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS).
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/src/third_party/fiat/METADATA b/src/third_party/fiat/METADATA
index e527c14..339fe5f 100644
--- a/src/third_party/fiat/METADATA
+++ b/src/third_party/fiat/METADATA
@@ -6,8 +6,8 @@
     type: GIT
     value: "https://github.com/mit-plv/fiat-crypto"
   }
-  version: "0884b6d374a9d937c44bf024fe3a647ffae2c540"
-  last_upgrade_date { year: 2020 month: 4 day: 16 }
+  version: "6ccc6638716d4632304baf1adbb5c47c3a12ea6f"
+  last_upgrade_date { year: 2022 month: 3 day: 22 }
 
-  local_modifications: "Files renamed to .h for BoringSSL integration. Select functions patched with value barriers."
+  local_modifications: "Files renamed to .h for BoringSSL integration. LICENSE file is LICENSE-MIT from upstream."
 }
diff --git a/src/third_party/fiat/curve25519_32.h b/src/third_party/fiat/curve25519_32.h
index 7b78d00..cb83c60 100644
--- a/src/third_party/fiat/curve25519_32.h
+++ b/src/third_party/fiat/curve25519_32.h
@@ -1,24 +1,51 @@
-/* Autogenerated: src/ExtractionOCaml/unsaturated_solinas --static 25519 10 '2^255 - 19' 32 carry_mul carry_square carry add sub opp selectznz to_bytes from_bytes carry_scmul121666 */
+/* Autogenerated: 'src/ExtractionOCaml/unsaturated_solinas' --inline --static --use-value-barrier 25519 32 '(auto)' '2^255 - 19' carry_mul carry_square carry add sub opp selectznz to_bytes from_bytes relax carry_scmul121666 */
 /* curve description: 25519 */
-/* requested operations: carry_mul, carry_square, carry, add, sub, opp, selectznz, to_bytes, from_bytes, carry_scmul121666 */
-/* n = 10 (from "10") */
-/* s-c = 2^255 - [(1, 19)] (from "2^255 - 19") */
 /* machine_wordsize = 32 (from "32") */
-
+/* requested operations: carry_mul, carry_square, carry, add, sub, opp, selectznz, to_bytes, from_bytes, relax, carry_scmul121666 */
+/* n = 10 (from "(auto)") */
+/* s-c = 2^255 - [(1, 19)] (from "2^255 - 19") */
+/* tight_bounds_multiplier = 1 (from "") */
+/*  */
 /* Computed values: */
-/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1] */
+/*   carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1] */
+/*   eval z = z[0] + (z[1] << 26) + (z[2] << 51) + (z[3] << 77) + (z[4] << 102) + (z[5] << 128) + (z[6] << 153) + (z[7] << 179) + (z[8] << 204) + (z[9] << 230) */
+/*   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
+/*   balance = [0x7ffffda, 0x3fffffe, 0x7fffffe, 0x3fffffe, 0x7fffffe, 0x3fffffe, 0x7fffffe, 0x3fffffe, 0x7fffffe, 0x3fffffe] */
 
 #include <stdint.h>
 typedef unsigned char fiat_25519_uint1;
 typedef signed char fiat_25519_int1;
+#if defined(__GNUC__) || defined(__clang__)
+#  define FIAT_25519_FIAT_INLINE __inline__
+#else
+#  define FIAT_25519_FIAT_INLINE
+#endif
+
+/* The type fiat_25519_loose_field_element is a field element with loose bounds. */
+/* Bounds: [[0x0 ~> 0xc000000], [0x0 ~> 0x6000000], [0x0 ~> 0xc000000], [0x0 ~> 0x6000000], [0x0 ~> 0xc000000], [0x0 ~> 0x6000000], [0x0 ~> 0xc000000], [0x0 ~> 0x6000000], [0x0 ~> 0xc000000], [0x0 ~> 0x6000000]] */
+typedef uint32_t fiat_25519_loose_field_element[10];
+
+/* The type fiat_25519_tight_field_element is a field element with tight bounds. */
+/* Bounds: [[0x0 ~> 0x4000000], [0x0 ~> 0x2000000], [0x0 ~> 0x4000000], [0x0 ~> 0x2000000], [0x0 ~> 0x4000000], [0x0 ~> 0x2000000], [0x0 ~> 0x4000000], [0x0 ~> 0x2000000], [0x0 ~> 0x4000000], [0x0 ~> 0x2000000]] */
+typedef uint32_t fiat_25519_tight_field_element[10];
 
 #if (-1 & 3) != 3
 #error "This code only works on a two's complement system"
 #endif
 
+#if !defined(FIAT_25519_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint32_t fiat_25519_value_barrier_u32(uint32_t a) {
+  __asm__("" : "+r"(a) : /* no inputs */);
+  return a;
+}
+#else
+#  define fiat_25519_value_barrier_u32(x) (x)
+#endif
+
 
 /*
  * The function fiat_25519_addcarryx_u26 is an addition with carry.
+ *
  * Postconditions:
  *   out1 = (arg1 + arg2 + arg3) mod 2^26
  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^26⌋
@@ -31,16 +58,20 @@
  *   out1: [0x0 ~> 0x3ffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_addcarryx_u26(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  uint32_t x1 = ((arg1 + arg2) + arg3);
-  uint32_t x2 = (x1 & UINT32_C(0x3ffffff));
-  fiat_25519_uint1 x3 = (fiat_25519_uint1)(x1 >> 26);
+static FIAT_25519_FIAT_INLINE void fiat_25519_addcarryx_u26(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  uint32_t x1;
+  uint32_t x2;
+  fiat_25519_uint1 x3;
+  x1 = ((arg1 + arg2) + arg3);
+  x2 = (x1 & UINT32_C(0x3ffffff));
+  x3 = (fiat_25519_uint1)(x1 >> 26);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_25519_subborrowx_u26 is a subtraction with borrow.
+ *
  * Postconditions:
  *   out1 = (-arg1 + arg2 + -arg3) mod 2^26
  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^26⌋
@@ -53,16 +84,20 @@
  *   out1: [0x0 ~> 0x3ffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_subborrowx_u26(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
-  fiat_25519_int1 x2 = (fiat_25519_int1)(x1 >> 26);
-  uint32_t x3 = (x1 & UINT32_C(0x3ffffff));
+static FIAT_25519_FIAT_INLINE void fiat_25519_subborrowx_u26(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  int32_t x1;
+  fiat_25519_int1 x2;
+  uint32_t x3;
+  x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+  x2 = (fiat_25519_int1)(x1 >> 26);
+  x3 = (x1 & UINT32_C(0x3ffffff));
   *out1 = x3;
   *out2 = (fiat_25519_uint1)(0x0 - x2);
 }
 
 /*
  * The function fiat_25519_addcarryx_u25 is an addition with carry.
+ *
  * Postconditions:
  *   out1 = (arg1 + arg2 + arg3) mod 2^25
  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^25⌋
@@ -75,16 +110,20 @@
  *   out1: [0x0 ~> 0x1ffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_addcarryx_u25(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  uint32_t x1 = ((arg1 + arg2) + arg3);
-  uint32_t x2 = (x1 & UINT32_C(0x1ffffff));
-  fiat_25519_uint1 x3 = (fiat_25519_uint1)(x1 >> 25);
+static FIAT_25519_FIAT_INLINE void fiat_25519_addcarryx_u25(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  uint32_t x1;
+  uint32_t x2;
+  fiat_25519_uint1 x3;
+  x1 = ((arg1 + arg2) + arg3);
+  x2 = (x1 & UINT32_C(0x1ffffff));
+  x3 = (fiat_25519_uint1)(x1 >> 25);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_25519_subborrowx_u25 is a subtraction with borrow.
+ *
  * Postconditions:
  *   out1 = (-arg1 + arg2 + -arg3) mod 2^25
  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^25⌋
@@ -97,16 +136,20 @@
  *   out1: [0x0 ~> 0x1ffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_subborrowx_u25(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
-  fiat_25519_int1 x2 = (fiat_25519_int1)(x1 >> 25);
-  uint32_t x3 = (x1 & UINT32_C(0x1ffffff));
+static FIAT_25519_FIAT_INLINE void fiat_25519_subborrowx_u25(uint32_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  int32_t x1;
+  fiat_25519_int1 x2;
+  uint32_t x3;
+  x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+  x2 = (fiat_25519_int1)(x1 >> 25);
+  x3 = (x1 & UINT32_C(0x1ffffff));
   *out1 = x3;
   *out2 = (fiat_25519_uint1)(0x0 - x2);
 }
 
 /*
  * The function fiat_25519_cmovznz_u32 is a single-word conditional move.
+ *
  * Postconditions:
  *   out1 = (if arg1 = 0 then arg2 else arg3)
  *
@@ -117,178 +160,318 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffff]
  */
-static void fiat_25519_cmovznz_u32(uint32_t* out1, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  fiat_25519_uint1 x1 = (!(!arg1));
-  uint32_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT32_C(0xffffffff));
-  // Note this line has been patched from the synthesized code to add value
-  // barriers.
-  //
-  // Clang recognizes this pattern as a select. While it usually transforms it
-  // to a cmov, it sometimes further transforms it into a branch, which we do
-  // not want.
-  uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2));
+static FIAT_25519_FIAT_INLINE void fiat_25519_cmovznz_u32(uint32_t* out1, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  fiat_25519_uint1 x1;
+  uint32_t x2;
+  uint32_t x3;
+  x1 = (!(!arg1));
+  x2 = ((fiat_25519_int1)(0x0 - x1) & UINT32_C(0xffffffff));
+  x3 = ((fiat_25519_value_barrier_u32(x2) & arg3) | (fiat_25519_value_barrier_u32((~x2)) & arg2));
   *out1 = x3;
 }
 
 /*
  * The function fiat_25519_carry_mul multiplies two field elements and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
- *   arg2: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  */
-static void fiat_25519_carry_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) {
-  uint64_t x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * UINT8_C(0x26)));
-  uint64_t x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * UINT8_C(0x26)));
-  uint64_t x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * UINT8_C(0x26)));
-  uint64_t x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * UINT8_C(0x13)));
-  uint64_t x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * UINT8_C(0x26)));
-  uint64_t x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * UINT8_C(0x13)));
-  uint64_t x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * UINT8_C(0x26)));
-  uint64_t x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * UINT8_C(0x13)));
-  uint64_t x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * UINT8_C(0x13)));
-  uint64_t x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * UINT8_C(0x13)));
-  uint64_t x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * UINT8_C(0x13)));
-  uint64_t x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * UINT8_C(0x13)));
-  uint64_t x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * UINT8_C(0x13)));
-  uint64_t x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * UINT8_C(0x26)));
-  uint64_t x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * UINT8_C(0x26)));
-  uint64_t x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * UINT8_C(0x26)));
-  uint64_t x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * UINT8_C(0x13)));
-  uint64_t x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * UINT8_C(0x26)));
-  uint64_t x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * UINT8_C(0x13)));
-  uint64_t x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * UINT8_C(0x13)));
-  uint64_t x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * UINT8_C(0x13)));
-  uint64_t x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * UINT8_C(0x13)));
-  uint64_t x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * UINT8_C(0x26)));
-  uint64_t x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * UINT8_C(0x26)));
-  uint64_t x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * UINT8_C(0x26)));
-  uint64_t x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * UINT8_C(0x13)));
-  uint64_t x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * UINT8_C(0x13)));
-  uint64_t x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * UINT8_C(0x13)));
-  uint64_t x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * UINT8_C(0x26)));
-  uint64_t x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * UINT8_C(0x26)));
-  uint64_t x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * UINT8_C(0x13)));
-  uint64_t x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * UINT8_C(0x13)));
-  uint64_t x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * UINT8_C(0x26)));
-  uint64_t x46 = ((uint64_t)(arg1[9]) * (arg2[0]));
-  uint64_t x47 = ((uint64_t)(arg1[8]) * (arg2[1]));
-  uint64_t x48 = ((uint64_t)(arg1[8]) * (arg2[0]));
-  uint64_t x49 = ((uint64_t)(arg1[7]) * (arg2[2]));
-  uint64_t x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * 0x2));
-  uint64_t x51 = ((uint64_t)(arg1[7]) * (arg2[0]));
-  uint64_t x52 = ((uint64_t)(arg1[6]) * (arg2[3]));
-  uint64_t x53 = ((uint64_t)(arg1[6]) * (arg2[2]));
-  uint64_t x54 = ((uint64_t)(arg1[6]) * (arg2[1]));
-  uint64_t x55 = ((uint64_t)(arg1[6]) * (arg2[0]));
-  uint64_t x56 = ((uint64_t)(arg1[5]) * (arg2[4]));
-  uint64_t x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * 0x2));
-  uint64_t x58 = ((uint64_t)(arg1[5]) * (arg2[2]));
-  uint64_t x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * 0x2));
-  uint64_t x60 = ((uint64_t)(arg1[5]) * (arg2[0]));
-  uint64_t x61 = ((uint64_t)(arg1[4]) * (arg2[5]));
-  uint64_t x62 = ((uint64_t)(arg1[4]) * (arg2[4]));
-  uint64_t x63 = ((uint64_t)(arg1[4]) * (arg2[3]));
-  uint64_t x64 = ((uint64_t)(arg1[4]) * (arg2[2]));
-  uint64_t x65 = ((uint64_t)(arg1[4]) * (arg2[1]));
-  uint64_t x66 = ((uint64_t)(arg1[4]) * (arg2[0]));
-  uint64_t x67 = ((uint64_t)(arg1[3]) * (arg2[6]));
-  uint64_t x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * 0x2));
-  uint64_t x69 = ((uint64_t)(arg1[3]) * (arg2[4]));
-  uint64_t x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * 0x2));
-  uint64_t x71 = ((uint64_t)(arg1[3]) * (arg2[2]));
-  uint64_t x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * 0x2));
-  uint64_t x73 = ((uint64_t)(arg1[3]) * (arg2[0]));
-  uint64_t x74 = ((uint64_t)(arg1[2]) * (arg2[7]));
-  uint64_t x75 = ((uint64_t)(arg1[2]) * (arg2[6]));
-  uint64_t x76 = ((uint64_t)(arg1[2]) * (arg2[5]));
-  uint64_t x77 = ((uint64_t)(arg1[2]) * (arg2[4]));
-  uint64_t x78 = ((uint64_t)(arg1[2]) * (arg2[3]));
-  uint64_t x79 = ((uint64_t)(arg1[2]) * (arg2[2]));
-  uint64_t x80 = ((uint64_t)(arg1[2]) * (arg2[1]));
-  uint64_t x81 = ((uint64_t)(arg1[2]) * (arg2[0]));
-  uint64_t x82 = ((uint64_t)(arg1[1]) * (arg2[8]));
-  uint64_t x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * 0x2));
-  uint64_t x84 = ((uint64_t)(arg1[1]) * (arg2[6]));
-  uint64_t x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * 0x2));
-  uint64_t x86 = ((uint64_t)(arg1[1]) * (arg2[4]));
-  uint64_t x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * 0x2));
-  uint64_t x88 = ((uint64_t)(arg1[1]) * (arg2[2]));
-  uint64_t x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * 0x2));
-  uint64_t x90 = ((uint64_t)(arg1[1]) * (arg2[0]));
-  uint64_t x91 = ((uint64_t)(arg1[0]) * (arg2[9]));
-  uint64_t x92 = ((uint64_t)(arg1[0]) * (arg2[8]));
-  uint64_t x93 = ((uint64_t)(arg1[0]) * (arg2[7]));
-  uint64_t x94 = ((uint64_t)(arg1[0]) * (arg2[6]));
-  uint64_t x95 = ((uint64_t)(arg1[0]) * (arg2[5]));
-  uint64_t x96 = ((uint64_t)(arg1[0]) * (arg2[4]));
-  uint64_t x97 = ((uint64_t)(arg1[0]) * (arg2[3]));
-  uint64_t x98 = ((uint64_t)(arg1[0]) * (arg2[2]));
-  uint64_t x99 = ((uint64_t)(arg1[0]) * (arg2[1]));
-  uint64_t x100 = ((uint64_t)(arg1[0]) * (arg2[0]));
-  uint64_t x101 = (x100 + (x45 + (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9)))))))));
-  uint64_t x102 = (x101 >> 26);
-  uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
-  uint64_t x104 = (x91 + (x82 + (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46)))))))));
-  uint64_t x105 = (x92 + (x83 + (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1)))))))));
-  uint64_t x106 = (x93 + (x84 + (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2)))))))));
-  uint64_t x107 = (x94 + (x85 + (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3)))))))));
-  uint64_t x108 = (x95 + (x86 + (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4)))))))));
-  uint64_t x109 = (x96 + (x87 + (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5)))))))));
-  uint64_t x110 = (x97 + (x88 + (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6)))))))));
-  uint64_t x111 = (x98 + (x89 + (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7)))))))));
-  uint64_t x112 = (x99 + (x90 + (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8)))))))));
-  uint64_t x113 = (x102 + x112);
-  uint64_t x114 = (x113 >> 25);
-  uint32_t x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff));
-  uint64_t x116 = (x114 + x111);
-  uint64_t x117 = (x116 >> 26);
-  uint32_t x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff));
-  uint64_t x119 = (x117 + x110);
-  uint64_t x120 = (x119 >> 25);
-  uint32_t x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff));
-  uint64_t x122 = (x120 + x109);
-  uint64_t x123 = (x122 >> 26);
-  uint32_t x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff));
-  uint64_t x125 = (x123 + x108);
-  uint64_t x126 = (x125 >> 25);
-  uint32_t x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff));
-  uint64_t x128 = (x126 + x107);
-  uint64_t x129 = (x128 >> 26);
-  uint32_t x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff));
-  uint64_t x131 = (x129 + x106);
-  uint64_t x132 = (x131 >> 25);
-  uint32_t x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff));
-  uint64_t x134 = (x132 + x105);
-  uint64_t x135 = (x134 >> 26);
-  uint32_t x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff));
-  uint64_t x137 = (x135 + x104);
-  uint64_t x138 = (x137 >> 25);
-  uint32_t x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff));
-  uint64_t x140 = (x138 * UINT8_C(0x13));
-  uint64_t x141 = (x103 + x140);
-  uint32_t x142 = (uint32_t)(x141 >> 26);
-  uint32_t x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff));
-  uint32_t x144 = (x142 + x115);
-  fiat_25519_uint1 x145 = (fiat_25519_uint1)(x144 >> 25);
-  uint32_t x146 = (x144 & UINT32_C(0x1ffffff));
-  uint32_t x147 = (x145 + x118);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_mul(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1, const fiat_25519_loose_field_element arg2) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  uint64_t x13;
+  uint64_t x14;
+  uint64_t x15;
+  uint64_t x16;
+  uint64_t x17;
+  uint64_t x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  uint64_t x26;
+  uint64_t x27;
+  uint64_t x28;
+  uint64_t x29;
+  uint64_t x30;
+  uint64_t x31;
+  uint64_t x32;
+  uint64_t x33;
+  uint64_t x34;
+  uint64_t x35;
+  uint64_t x36;
+  uint64_t x37;
+  uint64_t x38;
+  uint64_t x39;
+  uint64_t x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  uint64_t x48;
+  uint64_t x49;
+  uint64_t x50;
+  uint64_t x51;
+  uint64_t x52;
+  uint64_t x53;
+  uint64_t x54;
+  uint64_t x55;
+  uint64_t x56;
+  uint64_t x57;
+  uint64_t x58;
+  uint64_t x59;
+  uint64_t x60;
+  uint64_t x61;
+  uint64_t x62;
+  uint64_t x63;
+  uint64_t x64;
+  uint64_t x65;
+  uint64_t x66;
+  uint64_t x67;
+  uint64_t x68;
+  uint64_t x69;
+  uint64_t x70;
+  uint64_t x71;
+  uint64_t x72;
+  uint64_t x73;
+  uint64_t x74;
+  uint64_t x75;
+  uint64_t x76;
+  uint64_t x77;
+  uint64_t x78;
+  uint64_t x79;
+  uint64_t x80;
+  uint64_t x81;
+  uint64_t x82;
+  uint64_t x83;
+  uint64_t x84;
+  uint64_t x85;
+  uint64_t x86;
+  uint64_t x87;
+  uint64_t x88;
+  uint64_t x89;
+  uint64_t x90;
+  uint64_t x91;
+  uint64_t x92;
+  uint64_t x93;
+  uint64_t x94;
+  uint64_t x95;
+  uint64_t x96;
+  uint64_t x97;
+  uint64_t x98;
+  uint64_t x99;
+  uint64_t x100;
+  uint64_t x101;
+  uint64_t x102;
+  uint32_t x103;
+  uint64_t x104;
+  uint64_t x105;
+  uint64_t x106;
+  uint64_t x107;
+  uint64_t x108;
+  uint64_t x109;
+  uint64_t x110;
+  uint64_t x111;
+  uint64_t x112;
+  uint64_t x113;
+  uint64_t x114;
+  uint32_t x115;
+  uint64_t x116;
+  uint64_t x117;
+  uint32_t x118;
+  uint64_t x119;
+  uint64_t x120;
+  uint32_t x121;
+  uint64_t x122;
+  uint64_t x123;
+  uint32_t x124;
+  uint64_t x125;
+  uint64_t x126;
+  uint32_t x127;
+  uint64_t x128;
+  uint64_t x129;
+  uint32_t x130;
+  uint64_t x131;
+  uint64_t x132;
+  uint32_t x133;
+  uint64_t x134;
+  uint64_t x135;
+  uint32_t x136;
+  uint64_t x137;
+  uint64_t x138;
+  uint32_t x139;
+  uint64_t x140;
+  uint64_t x141;
+  uint32_t x142;
+  uint32_t x143;
+  uint32_t x144;
+  fiat_25519_uint1 x145;
+  uint32_t x146;
+  uint32_t x147;
+  x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * UINT8_C(0x26)));
+  x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * UINT8_C(0x13)));
+  x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * UINT8_C(0x26)));
+  x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * UINT8_C(0x13)));
+  x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * UINT8_C(0x26)));
+  x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * UINT8_C(0x13)));
+  x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * UINT8_C(0x26)));
+  x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * UINT8_C(0x13)));
+  x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * UINT8_C(0x26)));
+  x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * UINT8_C(0x13)));
+  x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * UINT8_C(0x13)));
+  x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * UINT8_C(0x13)));
+  x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * UINT8_C(0x13)));
+  x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * UINT8_C(0x13)));
+  x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * UINT8_C(0x13)));
+  x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * UINT8_C(0x13)));
+  x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * UINT8_C(0x13)));
+  x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * UINT8_C(0x26)));
+  x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * UINT8_C(0x13)));
+  x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * UINT8_C(0x26)));
+  x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * UINT8_C(0x13)));
+  x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * UINT8_C(0x26)));
+  x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * UINT8_C(0x13)));
+  x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * UINT8_C(0x26)));
+  x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * UINT8_C(0x13)));
+  x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * UINT8_C(0x13)));
+  x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * UINT8_C(0x13)));
+  x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * UINT8_C(0x13)));
+  x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * UINT8_C(0x13)));
+  x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * UINT8_C(0x13)));
+  x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * UINT8_C(0x26)));
+  x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * UINT8_C(0x13)));
+  x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * UINT8_C(0x26)));
+  x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * UINT8_C(0x13)));
+  x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * UINT8_C(0x26)));
+  x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * UINT8_C(0x13)));
+  x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * UINT8_C(0x13)));
+  x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * UINT8_C(0x13)));
+  x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * UINT8_C(0x13)));
+  x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * UINT8_C(0x26)));
+  x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * UINT8_C(0x13)));
+  x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * UINT8_C(0x26)));
+  x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * UINT8_C(0x13)));
+  x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * UINT8_C(0x13)));
+  x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * UINT8_C(0x26)));
+  x46 = ((uint64_t)(arg1[9]) * (arg2[0]));
+  x47 = ((uint64_t)(arg1[8]) * (arg2[1]));
+  x48 = ((uint64_t)(arg1[8]) * (arg2[0]));
+  x49 = ((uint64_t)(arg1[7]) * (arg2[2]));
+  x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * 0x2));
+  x51 = ((uint64_t)(arg1[7]) * (arg2[0]));
+  x52 = ((uint64_t)(arg1[6]) * (arg2[3]));
+  x53 = ((uint64_t)(arg1[6]) * (arg2[2]));
+  x54 = ((uint64_t)(arg1[6]) * (arg2[1]));
+  x55 = ((uint64_t)(arg1[6]) * (arg2[0]));
+  x56 = ((uint64_t)(arg1[5]) * (arg2[4]));
+  x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * 0x2));
+  x58 = ((uint64_t)(arg1[5]) * (arg2[2]));
+  x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * 0x2));
+  x60 = ((uint64_t)(arg1[5]) * (arg2[0]));
+  x61 = ((uint64_t)(arg1[4]) * (arg2[5]));
+  x62 = ((uint64_t)(arg1[4]) * (arg2[4]));
+  x63 = ((uint64_t)(arg1[4]) * (arg2[3]));
+  x64 = ((uint64_t)(arg1[4]) * (arg2[2]));
+  x65 = ((uint64_t)(arg1[4]) * (arg2[1]));
+  x66 = ((uint64_t)(arg1[4]) * (arg2[0]));
+  x67 = ((uint64_t)(arg1[3]) * (arg2[6]));
+  x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * 0x2));
+  x69 = ((uint64_t)(arg1[3]) * (arg2[4]));
+  x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * 0x2));
+  x71 = ((uint64_t)(arg1[3]) * (arg2[2]));
+  x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * 0x2));
+  x73 = ((uint64_t)(arg1[3]) * (arg2[0]));
+  x74 = ((uint64_t)(arg1[2]) * (arg2[7]));
+  x75 = ((uint64_t)(arg1[2]) * (arg2[6]));
+  x76 = ((uint64_t)(arg1[2]) * (arg2[5]));
+  x77 = ((uint64_t)(arg1[2]) * (arg2[4]));
+  x78 = ((uint64_t)(arg1[2]) * (arg2[3]));
+  x79 = ((uint64_t)(arg1[2]) * (arg2[2]));
+  x80 = ((uint64_t)(arg1[2]) * (arg2[1]));
+  x81 = ((uint64_t)(arg1[2]) * (arg2[0]));
+  x82 = ((uint64_t)(arg1[1]) * (arg2[8]));
+  x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * 0x2));
+  x84 = ((uint64_t)(arg1[1]) * (arg2[6]));
+  x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * 0x2));
+  x86 = ((uint64_t)(arg1[1]) * (arg2[4]));
+  x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * 0x2));
+  x88 = ((uint64_t)(arg1[1]) * (arg2[2]));
+  x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * 0x2));
+  x90 = ((uint64_t)(arg1[1]) * (arg2[0]));
+  x91 = ((uint64_t)(arg1[0]) * (arg2[9]));
+  x92 = ((uint64_t)(arg1[0]) * (arg2[8]));
+  x93 = ((uint64_t)(arg1[0]) * (arg2[7]));
+  x94 = ((uint64_t)(arg1[0]) * (arg2[6]));
+  x95 = ((uint64_t)(arg1[0]) * (arg2[5]));
+  x96 = ((uint64_t)(arg1[0]) * (arg2[4]));
+  x97 = ((uint64_t)(arg1[0]) * (arg2[3]));
+  x98 = ((uint64_t)(arg1[0]) * (arg2[2]));
+  x99 = ((uint64_t)(arg1[0]) * (arg2[1]));
+  x100 = ((uint64_t)(arg1[0]) * (arg2[0]));
+  x101 = (x100 + (x45 + (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9)))))))));
+  x102 = (x101 >> 26);
+  x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
+  x104 = (x91 + (x82 + (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46)))))))));
+  x105 = (x92 + (x83 + (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1)))))))));
+  x106 = (x93 + (x84 + (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2)))))))));
+  x107 = (x94 + (x85 + (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3)))))))));
+  x108 = (x95 + (x86 + (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4)))))))));
+  x109 = (x96 + (x87 + (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5)))))))));
+  x110 = (x97 + (x88 + (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6)))))))));
+  x111 = (x98 + (x89 + (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7)))))))));
+  x112 = (x99 + (x90 + (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8)))))))));
+  x113 = (x102 + x112);
+  x114 = (x113 >> 25);
+  x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff));
+  x116 = (x114 + x111);
+  x117 = (x116 >> 26);
+  x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff));
+  x119 = (x117 + x110);
+  x120 = (x119 >> 25);
+  x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff));
+  x122 = (x120 + x109);
+  x123 = (x122 >> 26);
+  x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff));
+  x125 = (x123 + x108);
+  x126 = (x125 >> 25);
+  x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff));
+  x128 = (x126 + x107);
+  x129 = (x128 >> 26);
+  x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff));
+  x131 = (x129 + x106);
+  x132 = (x131 >> 25);
+  x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff));
+  x134 = (x132 + x105);
+  x135 = (x134 >> 26);
+  x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff));
+  x137 = (x135 + x104);
+  x138 = (x137 >> 25);
+  x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff));
+  x140 = (x138 * UINT8_C(0x13));
+  x141 = (x103 + x140);
+  x142 = (uint32_t)(x141 >> 26);
+  x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff));
+  x144 = (x142 + x115);
+  x145 = (fiat_25519_uint1)(x144 >> 25);
+  x146 = (x144 & UINT32_C(0x1ffffff));
+  x147 = (x145 + x118);
   out1[0] = x143;
   out1[1] = x146;
   out1[2] = x147;
@@ -303,135 +486,252 @@
 
 /*
  * The function fiat_25519_carry_square squares a field element and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * eval arg1) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  */
-static void fiat_25519_carry_square(uint32_t out1[10], const uint32_t arg1[10]) {
-  uint32_t x1 = ((arg1[9]) * UINT8_C(0x13));
-  uint32_t x2 = (x1 * 0x2);
-  uint32_t x3 = ((arg1[9]) * 0x2);
-  uint32_t x4 = ((arg1[8]) * UINT8_C(0x13));
-  uint64_t x5 = ((uint64_t)x4 * 0x2);
-  uint32_t x6 = ((arg1[8]) * 0x2);
-  uint32_t x7 = ((arg1[7]) * UINT8_C(0x13));
-  uint32_t x8 = (x7 * 0x2);
-  uint32_t x9 = ((arg1[7]) * 0x2);
-  uint32_t x10 = ((arg1[6]) * UINT8_C(0x13));
-  uint64_t x11 = ((uint64_t)x10 * 0x2);
-  uint32_t x12 = ((arg1[6]) * 0x2);
-  uint32_t x13 = ((arg1[5]) * UINT8_C(0x13));
-  uint32_t x14 = ((arg1[5]) * 0x2);
-  uint32_t x15 = ((arg1[4]) * 0x2);
-  uint32_t x16 = ((arg1[3]) * 0x2);
-  uint32_t x17 = ((arg1[2]) * 0x2);
-  uint32_t x18 = ((arg1[1]) * 0x2);
-  uint64_t x19 = ((uint64_t)(arg1[9]) * (x1 * 0x2));
-  uint64_t x20 = ((uint64_t)(arg1[8]) * x2);
-  uint64_t x21 = ((uint64_t)(arg1[8]) * x4);
-  uint64_t x22 = ((arg1[7]) * ((uint64_t)x2 * 0x2));
-  uint64_t x23 = ((arg1[7]) * x5);
-  uint64_t x24 = ((uint64_t)(arg1[7]) * (x7 * 0x2));
-  uint64_t x25 = ((uint64_t)(arg1[6]) * x2);
-  uint64_t x26 = ((arg1[6]) * x5);
-  uint64_t x27 = ((uint64_t)(arg1[6]) * x8);
-  uint64_t x28 = ((uint64_t)(arg1[6]) * x10);
-  uint64_t x29 = ((arg1[5]) * ((uint64_t)x2 * 0x2));
-  uint64_t x30 = ((arg1[5]) * x5);
-  uint64_t x31 = ((arg1[5]) * ((uint64_t)x8 * 0x2));
-  uint64_t x32 = ((arg1[5]) * x11);
-  uint64_t x33 = ((uint64_t)(arg1[5]) * (x13 * 0x2));
-  uint64_t x34 = ((uint64_t)(arg1[4]) * x2);
-  uint64_t x35 = ((arg1[4]) * x5);
-  uint64_t x36 = ((uint64_t)(arg1[4]) * x8);
-  uint64_t x37 = ((arg1[4]) * x11);
-  uint64_t x38 = ((uint64_t)(arg1[4]) * x14);
-  uint64_t x39 = ((uint64_t)(arg1[4]) * (arg1[4]));
-  uint64_t x40 = ((arg1[3]) * ((uint64_t)x2 * 0x2));
-  uint64_t x41 = ((arg1[3]) * x5);
-  uint64_t x42 = ((arg1[3]) * ((uint64_t)x8 * 0x2));
-  uint64_t x43 = ((uint64_t)(arg1[3]) * x12);
-  uint64_t x44 = ((uint64_t)(arg1[3]) * (x14 * 0x2));
-  uint64_t x45 = ((uint64_t)(arg1[3]) * x15);
-  uint64_t x46 = ((uint64_t)(arg1[3]) * ((arg1[3]) * 0x2));
-  uint64_t x47 = ((uint64_t)(arg1[2]) * x2);
-  uint64_t x48 = ((arg1[2]) * x5);
-  uint64_t x49 = ((uint64_t)(arg1[2]) * x9);
-  uint64_t x50 = ((uint64_t)(arg1[2]) * x12);
-  uint64_t x51 = ((uint64_t)(arg1[2]) * x14);
-  uint64_t x52 = ((uint64_t)(arg1[2]) * x15);
-  uint64_t x53 = ((uint64_t)(arg1[2]) * x16);
-  uint64_t x54 = ((uint64_t)(arg1[2]) * (arg1[2]));
-  uint64_t x55 = ((arg1[1]) * ((uint64_t)x2 * 0x2));
-  uint64_t x56 = ((uint64_t)(arg1[1]) * x6);
-  uint64_t x57 = ((uint64_t)(arg1[1]) * (x9 * 0x2));
-  uint64_t x58 = ((uint64_t)(arg1[1]) * x12);
-  uint64_t x59 = ((uint64_t)(arg1[1]) * (x14 * 0x2));
-  uint64_t x60 = ((uint64_t)(arg1[1]) * x15);
-  uint64_t x61 = ((uint64_t)(arg1[1]) * (x16 * 0x2));
-  uint64_t x62 = ((uint64_t)(arg1[1]) * x17);
-  uint64_t x63 = ((uint64_t)(arg1[1]) * ((arg1[1]) * 0x2));
-  uint64_t x64 = ((uint64_t)(arg1[0]) * x3);
-  uint64_t x65 = ((uint64_t)(arg1[0]) * x6);
-  uint64_t x66 = ((uint64_t)(arg1[0]) * x9);
-  uint64_t x67 = ((uint64_t)(arg1[0]) * x12);
-  uint64_t x68 = ((uint64_t)(arg1[0]) * x14);
-  uint64_t x69 = ((uint64_t)(arg1[0]) * x15);
-  uint64_t x70 = ((uint64_t)(arg1[0]) * x16);
-  uint64_t x71 = ((uint64_t)(arg1[0]) * x17);
-  uint64_t x72 = ((uint64_t)(arg1[0]) * x18);
-  uint64_t x73 = ((uint64_t)(arg1[0]) * (arg1[0]));
-  uint64_t x74 = (x73 + (x55 + (x48 + (x42 + (x37 + x33)))));
-  uint64_t x75 = (x74 >> 26);
-  uint32_t x76 = (uint32_t)(x74 & UINT32_C(0x3ffffff));
-  uint64_t x77 = (x64 + (x56 + (x49 + (x43 + x38))));
-  uint64_t x78 = (x65 + (x57 + (x50 + (x44 + (x39 + x19)))));
-  uint64_t x79 = (x66 + (x58 + (x51 + (x45 + x20))));
-  uint64_t x80 = (x67 + (x59 + (x52 + (x46 + (x22 + x21)))));
-  uint64_t x81 = (x68 + (x60 + (x53 + (x25 + x23))));
-  uint64_t x82 = (x69 + (x61 + (x54 + (x29 + (x26 + x24)))));
-  uint64_t x83 = (x70 + (x62 + (x34 + (x30 + x27))));
-  uint64_t x84 = (x71 + (x63 + (x40 + (x35 + (x31 + x28)))));
-  uint64_t x85 = (x72 + (x47 + (x41 + (x36 + x32))));
-  uint64_t x86 = (x75 + x85);
-  uint64_t x87 = (x86 >> 25);
-  uint32_t x88 = (uint32_t)(x86 & UINT32_C(0x1ffffff));
-  uint64_t x89 = (x87 + x84);
-  uint64_t x90 = (x89 >> 26);
-  uint32_t x91 = (uint32_t)(x89 & UINT32_C(0x3ffffff));
-  uint64_t x92 = (x90 + x83);
-  uint64_t x93 = (x92 >> 25);
-  uint32_t x94 = (uint32_t)(x92 & UINT32_C(0x1ffffff));
-  uint64_t x95 = (x93 + x82);
-  uint64_t x96 = (x95 >> 26);
-  uint32_t x97 = (uint32_t)(x95 & UINT32_C(0x3ffffff));
-  uint64_t x98 = (x96 + x81);
-  uint64_t x99 = (x98 >> 25);
-  uint32_t x100 = (uint32_t)(x98 & UINT32_C(0x1ffffff));
-  uint64_t x101 = (x99 + x80);
-  uint64_t x102 = (x101 >> 26);
-  uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
-  uint64_t x104 = (x102 + x79);
-  uint64_t x105 = (x104 >> 25);
-  uint32_t x106 = (uint32_t)(x104 & UINT32_C(0x1ffffff));
-  uint64_t x107 = (x105 + x78);
-  uint64_t x108 = (x107 >> 26);
-  uint32_t x109 = (uint32_t)(x107 & UINT32_C(0x3ffffff));
-  uint64_t x110 = (x108 + x77);
-  uint64_t x111 = (x110 >> 25);
-  uint32_t x112 = (uint32_t)(x110 & UINT32_C(0x1ffffff));
-  uint64_t x113 = (x111 * UINT8_C(0x13));
-  uint64_t x114 = (x76 + x113);
-  uint32_t x115 = (uint32_t)(x114 >> 26);
-  uint32_t x116 = (uint32_t)(x114 & UINT32_C(0x3ffffff));
-  uint32_t x117 = (x115 + x88);
-  fiat_25519_uint1 x118 = (fiat_25519_uint1)(x117 >> 25);
-  uint32_t x119 = (x117 & UINT32_C(0x1ffffff));
-  uint32_t x120 = (x118 + x91);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_square(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint64_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint64_t x11;
+  uint32_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint32_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  uint64_t x26;
+  uint64_t x27;
+  uint64_t x28;
+  uint64_t x29;
+  uint64_t x30;
+  uint64_t x31;
+  uint64_t x32;
+  uint64_t x33;
+  uint64_t x34;
+  uint64_t x35;
+  uint64_t x36;
+  uint64_t x37;
+  uint64_t x38;
+  uint64_t x39;
+  uint64_t x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  uint64_t x48;
+  uint64_t x49;
+  uint64_t x50;
+  uint64_t x51;
+  uint64_t x52;
+  uint64_t x53;
+  uint64_t x54;
+  uint64_t x55;
+  uint64_t x56;
+  uint64_t x57;
+  uint64_t x58;
+  uint64_t x59;
+  uint64_t x60;
+  uint64_t x61;
+  uint64_t x62;
+  uint64_t x63;
+  uint64_t x64;
+  uint64_t x65;
+  uint64_t x66;
+  uint64_t x67;
+  uint64_t x68;
+  uint64_t x69;
+  uint64_t x70;
+  uint64_t x71;
+  uint64_t x72;
+  uint64_t x73;
+  uint64_t x74;
+  uint64_t x75;
+  uint32_t x76;
+  uint64_t x77;
+  uint64_t x78;
+  uint64_t x79;
+  uint64_t x80;
+  uint64_t x81;
+  uint64_t x82;
+  uint64_t x83;
+  uint64_t x84;
+  uint64_t x85;
+  uint64_t x86;
+  uint64_t x87;
+  uint32_t x88;
+  uint64_t x89;
+  uint64_t x90;
+  uint32_t x91;
+  uint64_t x92;
+  uint64_t x93;
+  uint32_t x94;
+  uint64_t x95;
+  uint64_t x96;
+  uint32_t x97;
+  uint64_t x98;
+  uint64_t x99;
+  uint32_t x100;
+  uint64_t x101;
+  uint64_t x102;
+  uint32_t x103;
+  uint64_t x104;
+  uint64_t x105;
+  uint32_t x106;
+  uint64_t x107;
+  uint64_t x108;
+  uint32_t x109;
+  uint64_t x110;
+  uint64_t x111;
+  uint32_t x112;
+  uint64_t x113;
+  uint64_t x114;
+  uint32_t x115;
+  uint32_t x116;
+  uint32_t x117;
+  fiat_25519_uint1 x118;
+  uint32_t x119;
+  uint32_t x120;
+  x1 = ((arg1[9]) * UINT8_C(0x13));
+  x2 = (x1 * 0x2);
+  x3 = ((arg1[9]) * 0x2);
+  x4 = ((arg1[8]) * UINT8_C(0x13));
+  x5 = ((uint64_t)x4 * 0x2);
+  x6 = ((arg1[8]) * 0x2);
+  x7 = ((arg1[7]) * UINT8_C(0x13));
+  x8 = (x7 * 0x2);
+  x9 = ((arg1[7]) * 0x2);
+  x10 = ((arg1[6]) * UINT8_C(0x13));
+  x11 = ((uint64_t)x10 * 0x2);
+  x12 = ((arg1[6]) * 0x2);
+  x13 = ((arg1[5]) * UINT8_C(0x13));
+  x14 = ((arg1[5]) * 0x2);
+  x15 = ((arg1[4]) * 0x2);
+  x16 = ((arg1[3]) * 0x2);
+  x17 = ((arg1[2]) * 0x2);
+  x18 = ((arg1[1]) * 0x2);
+  x19 = ((uint64_t)(arg1[9]) * (x1 * 0x2));
+  x20 = ((uint64_t)(arg1[8]) * x2);
+  x21 = ((uint64_t)(arg1[8]) * x4);
+  x22 = ((arg1[7]) * ((uint64_t)x2 * 0x2));
+  x23 = ((arg1[7]) * x5);
+  x24 = ((uint64_t)(arg1[7]) * (x7 * 0x2));
+  x25 = ((uint64_t)(arg1[6]) * x2);
+  x26 = ((arg1[6]) * x5);
+  x27 = ((uint64_t)(arg1[6]) * x8);
+  x28 = ((uint64_t)(arg1[6]) * x10);
+  x29 = ((arg1[5]) * ((uint64_t)x2 * 0x2));
+  x30 = ((arg1[5]) * x5);
+  x31 = ((arg1[5]) * ((uint64_t)x8 * 0x2));
+  x32 = ((arg1[5]) * x11);
+  x33 = ((uint64_t)(arg1[5]) * (x13 * 0x2));
+  x34 = ((uint64_t)(arg1[4]) * x2);
+  x35 = ((arg1[4]) * x5);
+  x36 = ((uint64_t)(arg1[4]) * x8);
+  x37 = ((arg1[4]) * x11);
+  x38 = ((uint64_t)(arg1[4]) * x14);
+  x39 = ((uint64_t)(arg1[4]) * (arg1[4]));
+  x40 = ((arg1[3]) * ((uint64_t)x2 * 0x2));
+  x41 = ((arg1[3]) * x5);
+  x42 = ((arg1[3]) * ((uint64_t)x8 * 0x2));
+  x43 = ((uint64_t)(arg1[3]) * x12);
+  x44 = ((uint64_t)(arg1[3]) * (x14 * 0x2));
+  x45 = ((uint64_t)(arg1[3]) * x15);
+  x46 = ((uint64_t)(arg1[3]) * ((arg1[3]) * 0x2));
+  x47 = ((uint64_t)(arg1[2]) * x2);
+  x48 = ((arg1[2]) * x5);
+  x49 = ((uint64_t)(arg1[2]) * x9);
+  x50 = ((uint64_t)(arg1[2]) * x12);
+  x51 = ((uint64_t)(arg1[2]) * x14);
+  x52 = ((uint64_t)(arg1[2]) * x15);
+  x53 = ((uint64_t)(arg1[2]) * x16);
+  x54 = ((uint64_t)(arg1[2]) * (arg1[2]));
+  x55 = ((arg1[1]) * ((uint64_t)x2 * 0x2));
+  x56 = ((uint64_t)(arg1[1]) * x6);
+  x57 = ((uint64_t)(arg1[1]) * (x9 * 0x2));
+  x58 = ((uint64_t)(arg1[1]) * x12);
+  x59 = ((uint64_t)(arg1[1]) * (x14 * 0x2));
+  x60 = ((uint64_t)(arg1[1]) * x15);
+  x61 = ((uint64_t)(arg1[1]) * (x16 * 0x2));
+  x62 = ((uint64_t)(arg1[1]) * x17);
+  x63 = ((uint64_t)(arg1[1]) * ((arg1[1]) * 0x2));
+  x64 = ((uint64_t)(arg1[0]) * x3);
+  x65 = ((uint64_t)(arg1[0]) * x6);
+  x66 = ((uint64_t)(arg1[0]) * x9);
+  x67 = ((uint64_t)(arg1[0]) * x12);
+  x68 = ((uint64_t)(arg1[0]) * x14);
+  x69 = ((uint64_t)(arg1[0]) * x15);
+  x70 = ((uint64_t)(arg1[0]) * x16);
+  x71 = ((uint64_t)(arg1[0]) * x17);
+  x72 = ((uint64_t)(arg1[0]) * x18);
+  x73 = ((uint64_t)(arg1[0]) * (arg1[0]));
+  x74 = (x73 + (x55 + (x48 + (x42 + (x37 + x33)))));
+  x75 = (x74 >> 26);
+  x76 = (uint32_t)(x74 & UINT32_C(0x3ffffff));
+  x77 = (x64 + (x56 + (x49 + (x43 + x38))));
+  x78 = (x65 + (x57 + (x50 + (x44 + (x39 + x19)))));
+  x79 = (x66 + (x58 + (x51 + (x45 + x20))));
+  x80 = (x67 + (x59 + (x52 + (x46 + (x22 + x21)))));
+  x81 = (x68 + (x60 + (x53 + (x25 + x23))));
+  x82 = (x69 + (x61 + (x54 + (x29 + (x26 + x24)))));
+  x83 = (x70 + (x62 + (x34 + (x30 + x27))));
+  x84 = (x71 + (x63 + (x40 + (x35 + (x31 + x28)))));
+  x85 = (x72 + (x47 + (x41 + (x36 + x32))));
+  x86 = (x75 + x85);
+  x87 = (x86 >> 25);
+  x88 = (uint32_t)(x86 & UINT32_C(0x1ffffff));
+  x89 = (x87 + x84);
+  x90 = (x89 >> 26);
+  x91 = (uint32_t)(x89 & UINT32_C(0x3ffffff));
+  x92 = (x90 + x83);
+  x93 = (x92 >> 25);
+  x94 = (uint32_t)(x92 & UINT32_C(0x1ffffff));
+  x95 = (x93 + x82);
+  x96 = (x95 >> 26);
+  x97 = (uint32_t)(x95 & UINT32_C(0x3ffffff));
+  x98 = (x96 + x81);
+  x99 = (x98 >> 25);
+  x100 = (uint32_t)(x98 & UINT32_C(0x1ffffff));
+  x101 = (x99 + x80);
+  x102 = (x101 >> 26);
+  x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
+  x104 = (x102 + x79);
+  x105 = (x104 >> 25);
+  x106 = (uint32_t)(x104 & UINT32_C(0x1ffffff));
+  x107 = (x105 + x78);
+  x108 = (x107 >> 26);
+  x109 = (uint32_t)(x107 & UINT32_C(0x3ffffff));
+  x110 = (x108 + x77);
+  x111 = (x110 >> 25);
+  x112 = (uint32_t)(x110 & UINT32_C(0x1ffffff));
+  x113 = (x111 * UINT8_C(0x13));
+  x114 = (x76 + x113);
+  x115 = (uint32_t)(x114 >> 26);
+  x116 = (uint32_t)(x114 & UINT32_C(0x3ffffff));
+  x117 = (x115 + x88);
+  x118 = (fiat_25519_uint1)(x117 >> 25);
+  x119 = (x117 & UINT32_C(0x1ffffff));
+  x120 = (x118 + x91);
   out1[0] = x116;
   out1[1] = x119;
   out1[2] = x120;
@@ -446,37 +746,56 @@
 
 /*
  * The function fiat_25519_carry reduces a field element.
+ *
  * Postconditions:
  *   eval out1 mod m = eval arg1 mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  */
-static void fiat_25519_carry(uint32_t out1[10], const uint32_t arg1[10]) {
-  uint32_t x1 = (arg1[0]);
-  uint32_t x2 = ((x1 >> 26) + (arg1[1]));
-  uint32_t x3 = ((x2 >> 25) + (arg1[2]));
-  uint32_t x4 = ((x3 >> 26) + (arg1[3]));
-  uint32_t x5 = ((x4 >> 25) + (arg1[4]));
-  uint32_t x6 = ((x5 >> 26) + (arg1[5]));
-  uint32_t x7 = ((x6 >> 25) + (arg1[6]));
-  uint32_t x8 = ((x7 >> 26) + (arg1[7]));
-  uint32_t x9 = ((x8 >> 25) + (arg1[8]));
-  uint32_t x10 = ((x9 >> 26) + (arg1[9]));
-  uint32_t x11 = ((x1 & UINT32_C(0x3ffffff)) + ((x10 >> 25) * UINT8_C(0x13)));
-  uint32_t x12 = ((fiat_25519_uint1)(x11 >> 26) + (x2 & UINT32_C(0x1ffffff)));
-  uint32_t x13 = (x11 & UINT32_C(0x3ffffff));
-  uint32_t x14 = (x12 & UINT32_C(0x1ffffff));
-  uint32_t x15 = ((fiat_25519_uint1)(x12 >> 25) + (x3 & UINT32_C(0x3ffffff)));
-  uint32_t x16 = (x4 & UINT32_C(0x1ffffff));
-  uint32_t x17 = (x5 & UINT32_C(0x3ffffff));
-  uint32_t x18 = (x6 & UINT32_C(0x1ffffff));
-  uint32_t x19 = (x7 & UINT32_C(0x3ffffff));
-  uint32_t x20 = (x8 & UINT32_C(0x1ffffff));
-  uint32_t x21 = (x9 & UINT32_C(0x3ffffff));
-  uint32_t x22 = (x10 & UINT32_C(0x1ffffff));
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint32_t x11;
+  uint32_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint32_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint32_t x19;
+  uint32_t x20;
+  uint32_t x21;
+  uint32_t x22;
+  x1 = (arg1[0]);
+  x2 = ((x1 >> 26) + (arg1[1]));
+  x3 = ((x2 >> 25) + (arg1[2]));
+  x4 = ((x3 >> 26) + (arg1[3]));
+  x5 = ((x4 >> 25) + (arg1[4]));
+  x6 = ((x5 >> 26) + (arg1[5]));
+  x7 = ((x6 >> 25) + (arg1[6]));
+  x8 = ((x7 >> 26) + (arg1[7]));
+  x9 = ((x8 >> 25) + (arg1[8]));
+  x10 = ((x9 >> 26) + (arg1[9]));
+  x11 = ((x1 & UINT32_C(0x3ffffff)) + ((x10 >> 25) * UINT8_C(0x13)));
+  x12 = ((fiat_25519_uint1)(x11 >> 26) + (x2 & UINT32_C(0x1ffffff)));
+  x13 = (x11 & UINT32_C(0x3ffffff));
+  x14 = (x12 & UINT32_C(0x1ffffff));
+  x15 = ((fiat_25519_uint1)(x12 >> 25) + (x3 & UINT32_C(0x3ffffff)));
+  x16 = (x4 & UINT32_C(0x1ffffff));
+  x17 = (x5 & UINT32_C(0x3ffffff));
+  x18 = (x6 & UINT32_C(0x1ffffff));
+  x19 = (x7 & UINT32_C(0x3ffffff));
+  x20 = (x8 & UINT32_C(0x1ffffff));
+  x21 = (x9 & UINT32_C(0x3ffffff));
+  x22 = (x10 & UINT32_C(0x1ffffff));
   out1[0] = x13;
   out1[1] = x14;
   out1[2] = x15;
@@ -491,26 +810,32 @@
 
 /*
  * The function fiat_25519_add adds two field elements.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 + eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
- *   arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
  */
-static void fiat_25519_add(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) {
-  uint32_t x1 = ((arg1[0]) + (arg2[0]));
-  uint32_t x2 = ((arg1[1]) + (arg2[1]));
-  uint32_t x3 = ((arg1[2]) + (arg2[2]));
-  uint32_t x4 = ((arg1[3]) + (arg2[3]));
-  uint32_t x5 = ((arg1[4]) + (arg2[4]));
-  uint32_t x6 = ((arg1[5]) + (arg2[5]));
-  uint32_t x7 = ((arg1[6]) + (arg2[6]));
-  uint32_t x8 = ((arg1[7]) + (arg2[7]));
-  uint32_t x9 = ((arg1[8]) + (arg2[8]));
-  uint32_t x10 = ((arg1[9]) + (arg2[9]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_add(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  x1 = ((arg1[0]) + (arg2[0]));
+  x2 = ((arg1[1]) + (arg2[1]));
+  x3 = ((arg1[2]) + (arg2[2]));
+  x4 = ((arg1[3]) + (arg2[3]));
+  x5 = ((arg1[4]) + (arg2[4]));
+  x6 = ((arg1[5]) + (arg2[5]));
+  x7 = ((arg1[6]) + (arg2[6]));
+  x8 = ((arg1[7]) + (arg2[7]));
+  x9 = ((arg1[8]) + (arg2[8]));
+  x10 = ((arg1[9]) + (arg2[9]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -525,26 +850,32 @@
 
 /*
  * The function fiat_25519_sub subtracts two field elements.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 - eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
- *   arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
  */
-static void fiat_25519_sub(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10]) {
-  uint32_t x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0]));
-  uint32_t x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1]));
-  uint32_t x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2]));
-  uint32_t x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3]));
-  uint32_t x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4]));
-  uint32_t x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5]));
-  uint32_t x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6]));
-  uint32_t x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7]));
-  uint32_t x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8]));
-  uint32_t x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_sub(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0]));
+  x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1]));
+  x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2]));
+  x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3]));
+  x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4]));
+  x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5]));
+  x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6]));
+  x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7]));
+  x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8]));
+  x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -559,25 +890,32 @@
 
 /*
  * The function fiat_25519_opp negates a field element.
+ *
  * Postconditions:
  *   eval out1 mod m = -eval arg1 mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
  */
-static void fiat_25519_opp(uint32_t out1[10], const uint32_t arg1[10]) {
-  uint32_t x1 = (UINT32_C(0x7ffffda) - (arg1[0]));
-  uint32_t x2 = (UINT32_C(0x3fffffe) - (arg1[1]));
-  uint32_t x3 = (UINT32_C(0x7fffffe) - (arg1[2]));
-  uint32_t x4 = (UINT32_C(0x3fffffe) - (arg1[3]));
-  uint32_t x5 = (UINT32_C(0x7fffffe) - (arg1[4]));
-  uint32_t x6 = (UINT32_C(0x3fffffe) - (arg1[5]));
-  uint32_t x7 = (UINT32_C(0x7fffffe) - (arg1[6]));
-  uint32_t x8 = (UINT32_C(0x3fffffe) - (arg1[7]));
-  uint32_t x9 = (UINT32_C(0x7fffffe) - (arg1[8]));
-  uint32_t x10 = (UINT32_C(0x3fffffe) - (arg1[9]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_opp(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  x1 = (UINT32_C(0x7ffffda) - (arg1[0]));
+  x2 = (UINT32_C(0x3fffffe) - (arg1[1]));
+  x3 = (UINT32_C(0x7fffffe) - (arg1[2]));
+  x4 = (UINT32_C(0x3fffffe) - (arg1[3]));
+  x5 = (UINT32_C(0x7fffffe) - (arg1[4]));
+  x6 = (UINT32_C(0x3fffffe) - (arg1[5]));
+  x7 = (UINT32_C(0x7fffffe) - (arg1[6]));
+  x8 = (UINT32_C(0x3fffffe) - (arg1[7]));
+  x9 = (UINT32_C(0x7fffffe) - (arg1[8]));
+  x10 = (UINT32_C(0x3fffffe) - (arg1[9]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -592,6 +930,7 @@
 
 /*
  * The function fiat_25519_selectznz is a multi-limb conditional select.
+ *
  * Postconditions:
  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
  *
@@ -602,26 +941,26 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_25519_selectznz(uint32_t out1[10], fiat_25519_uint1 arg1, const uint32_t arg2[10], const uint32_t arg3[10]) {
+static FIAT_25519_FIAT_INLINE void fiat_25519_selectznz(uint32_t out1[10], fiat_25519_uint1 arg1, const uint32_t arg2[10], const uint32_t arg3[10]) {
   uint32_t x1;
-  fiat_25519_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
   uint32_t x2;
-  fiat_25519_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
   uint32_t x3;
-  fiat_25519_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
   uint32_t x4;
-  fiat_25519_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
   uint32_t x5;
-  fiat_25519_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
   uint32_t x6;
-  fiat_25519_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
   uint32_t x7;
-  fiat_25519_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
   uint32_t x8;
-  fiat_25519_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
   uint32_t x9;
-  fiat_25519_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8]));
   uint32_t x10;
+  fiat_25519_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
+  fiat_25519_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
+  fiat_25519_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
+  fiat_25519_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
+  fiat_25519_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
+  fiat_25519_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
+  fiat_25519_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
+  fiat_25519_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
+  fiat_25519_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8]));
   fiat_25519_cmovznz_u32(&x10, arg1, (arg2[9]), (arg3[9]));
   out1[0] = x1;
   out1[1] = x2;
@@ -637,336 +976,582 @@
 
 /*
  * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order.
+ *
  * Postconditions:
  *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  * Output Bounds:
  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
  */
-static void fiat_25519_to_bytes(uint8_t out1[32], const uint32_t arg1[10]) {
+static FIAT_25519_FIAT_INLINE void fiat_25519_to_bytes(uint8_t out1[32], const fiat_25519_tight_field_element arg1) {
   uint32_t x1;
   fiat_25519_uint1 x2;
-  fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), UINT32_C(0x3ffffed));
   uint32_t x3;
   fiat_25519_uint1 x4;
-  fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff));
   uint32_t x5;
   fiat_25519_uint1 x6;
-  fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff));
   uint32_t x7;
   fiat_25519_uint1 x8;
-  fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff));
   uint32_t x9;
   fiat_25519_uint1 x10;
-  fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), UINT32_C(0x3ffffff));
   uint32_t x11;
   fiat_25519_uint1 x12;
-  fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), UINT32_C(0x1ffffff));
   uint32_t x13;
   fiat_25519_uint1 x14;
-  fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), UINT32_C(0x3ffffff));
   uint32_t x15;
   fiat_25519_uint1 x16;
-  fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), UINT32_C(0x1ffffff));
   uint32_t x17;
   fiat_25519_uint1 x18;
-  fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), UINT32_C(0x3ffffff));
   uint32_t x19;
   fiat_25519_uint1 x20;
-  fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), UINT32_C(0x1ffffff));
   uint32_t x21;
-  fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff));
   uint32_t x22;
   fiat_25519_uint1 x23;
-  fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, (x21 & UINT32_C(0x3ffffed)));
   uint32_t x24;
   fiat_25519_uint1 x25;
-  fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, (x21 & UINT32_C(0x1ffffff)));
   uint32_t x26;
   fiat_25519_uint1 x27;
-  fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, (x21 & UINT32_C(0x3ffffff)));
   uint32_t x28;
   fiat_25519_uint1 x29;
-  fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, (x21 & UINT32_C(0x1ffffff)));
   uint32_t x30;
   fiat_25519_uint1 x31;
-  fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, (x21 & UINT32_C(0x3ffffff)));
   uint32_t x32;
   fiat_25519_uint1 x33;
-  fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, (x21 & UINT32_C(0x1ffffff)));
   uint32_t x34;
   fiat_25519_uint1 x35;
-  fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, (x21 & UINT32_C(0x3ffffff)));
   uint32_t x36;
   fiat_25519_uint1 x37;
-  fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, (x21 & UINT32_C(0x1ffffff)));
   uint32_t x38;
   fiat_25519_uint1 x39;
-  fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, (x21 & UINT32_C(0x3ffffff)));
   uint32_t x40;
   fiat_25519_uint1 x41;
+  uint32_t x42;
+  uint32_t x43;
+  uint32_t x44;
+  uint32_t x45;
+  uint32_t x46;
+  uint32_t x47;
+  uint32_t x48;
+  uint32_t x49;
+  uint8_t x50;
+  uint32_t x51;
+  uint8_t x52;
+  uint32_t x53;
+  uint8_t x54;
+  uint8_t x55;
+  uint32_t x56;
+  uint8_t x57;
+  uint32_t x58;
+  uint8_t x59;
+  uint32_t x60;
+  uint8_t x61;
+  uint8_t x62;
+  uint32_t x63;
+  uint8_t x64;
+  uint32_t x65;
+  uint8_t x66;
+  uint32_t x67;
+  uint8_t x68;
+  uint8_t x69;
+  uint32_t x70;
+  uint8_t x71;
+  uint32_t x72;
+  uint8_t x73;
+  uint32_t x74;
+  uint8_t x75;
+  uint8_t x76;
+  uint32_t x77;
+  uint8_t x78;
+  uint32_t x79;
+  uint8_t x80;
+  uint32_t x81;
+  uint8_t x82;
+  uint8_t x83;
+  uint8_t x84;
+  uint32_t x85;
+  uint8_t x86;
+  uint32_t x87;
+  uint8_t x88;
+  fiat_25519_uint1 x89;
+  uint32_t x90;
+  uint8_t x91;
+  uint32_t x92;
+  uint8_t x93;
+  uint32_t x94;
+  uint8_t x95;
+  uint8_t x96;
+  uint32_t x97;
+  uint8_t x98;
+  uint32_t x99;
+  uint8_t x100;
+  uint32_t x101;
+  uint8_t x102;
+  uint8_t x103;
+  uint32_t x104;
+  uint8_t x105;
+  uint32_t x106;
+  uint8_t x107;
+  uint32_t x108;
+  uint8_t x109;
+  uint8_t x110;
+  uint32_t x111;
+  uint8_t x112;
+  uint32_t x113;
+  uint8_t x114;
+  uint32_t x115;
+  uint8_t x116;
+  uint8_t x117;
+  fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), UINT32_C(0x3ffffed));
+  fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff));
+  fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff));
+  fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff));
+  fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), UINT32_C(0x3ffffff));
+  fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), UINT32_C(0x1ffffff));
+  fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), UINT32_C(0x3ffffff));
+  fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), UINT32_C(0x1ffffff));
+  fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), UINT32_C(0x3ffffff));
+  fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), UINT32_C(0x1ffffff));
+  fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff));
+  fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, (x21 & UINT32_C(0x3ffffed)));
+  fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, (x21 & UINT32_C(0x1ffffff)));
+  fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, (x21 & UINT32_C(0x3ffffff)));
+  fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, (x21 & UINT32_C(0x1ffffff)));
+  fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, (x21 & UINT32_C(0x3ffffff)));
+  fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, (x21 & UINT32_C(0x1ffffff)));
+  fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, (x21 & UINT32_C(0x3ffffff)));
+  fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, (x21 & UINT32_C(0x1ffffff)));
+  fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, (x21 & UINT32_C(0x3ffffff)));
   fiat_25519_addcarryx_u25(&x40, &x41, x39, x19, (x21 & UINT32_C(0x1ffffff)));
-  uint32_t x42 = (x40 << 6);
-  uint32_t x43 = (x38 << 4);
-  uint32_t x44 = (x36 << 3);
-  uint32_t x45 = (x34 * (uint32_t)0x2);
-  uint32_t x46 = (x30 << 6);
-  uint32_t x47 = (x28 << 5);
-  uint32_t x48 = (x26 << 3);
-  uint32_t x49 = (x24 << 2);
-  uint32_t x50 = (x22 >> 8);
-  uint8_t x51 = (uint8_t)(x22 & UINT8_C(0xff));
-  uint32_t x52 = (x50 >> 8);
-  uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
-  uint8_t x54 = (uint8_t)(x52 >> 8);
-  uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
-  uint32_t x56 = (x54 + x49);
-  uint32_t x57 = (x56 >> 8);
-  uint8_t x58 = (uint8_t)(x56 & UINT8_C(0xff));
-  uint32_t x59 = (x57 >> 8);
-  uint8_t x60 = (uint8_t)(x57 & UINT8_C(0xff));
-  uint8_t x61 = (uint8_t)(x59 >> 8);
-  uint8_t x62 = (uint8_t)(x59 & UINT8_C(0xff));
-  uint32_t x63 = (x61 + x48);
-  uint32_t x64 = (x63 >> 8);
-  uint8_t x65 = (uint8_t)(x63 & UINT8_C(0xff));
-  uint32_t x66 = (x64 >> 8);
-  uint8_t x67 = (uint8_t)(x64 & UINT8_C(0xff));
-  uint8_t x68 = (uint8_t)(x66 >> 8);
-  uint8_t x69 = (uint8_t)(x66 & UINT8_C(0xff));
-  uint32_t x70 = (x68 + x47);
-  uint32_t x71 = (x70 >> 8);
-  uint8_t x72 = (uint8_t)(x70 & UINT8_C(0xff));
-  uint32_t x73 = (x71 >> 8);
-  uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff));
-  uint8_t x75 = (uint8_t)(x73 >> 8);
-  uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff));
-  uint32_t x77 = (x75 + x46);
-  uint32_t x78 = (x77 >> 8);
-  uint8_t x79 = (uint8_t)(x77 & UINT8_C(0xff));
-  uint32_t x80 = (x78 >> 8);
-  uint8_t x81 = (uint8_t)(x78 & UINT8_C(0xff));
-  uint8_t x82 = (uint8_t)(x80 >> 8);
-  uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff));
-  uint8_t x84 = (uint8_t)(x82 & UINT8_C(0xff));
-  uint32_t x85 = (x32 >> 8);
-  uint8_t x86 = (uint8_t)(x32 & UINT8_C(0xff));
-  uint32_t x87 = (x85 >> 8);
-  uint8_t x88 = (uint8_t)(x85 & UINT8_C(0xff));
-  fiat_25519_uint1 x89 = (fiat_25519_uint1)(x87 >> 8);
-  uint8_t x90 = (uint8_t)(x87 & UINT8_C(0xff));
-  uint32_t x91 = (x89 + x45);
-  uint32_t x92 = (x91 >> 8);
-  uint8_t x93 = (uint8_t)(x91 & UINT8_C(0xff));
-  uint32_t x94 = (x92 >> 8);
-  uint8_t x95 = (uint8_t)(x92 & UINT8_C(0xff));
-  uint8_t x96 = (uint8_t)(x94 >> 8);
-  uint8_t x97 = (uint8_t)(x94 & UINT8_C(0xff));
-  uint32_t x98 = (x96 + x44);
-  uint32_t x99 = (x98 >> 8);
-  uint8_t x100 = (uint8_t)(x98 & UINT8_C(0xff));
-  uint32_t x101 = (x99 >> 8);
-  uint8_t x102 = (uint8_t)(x99 & UINT8_C(0xff));
-  uint8_t x103 = (uint8_t)(x101 >> 8);
-  uint8_t x104 = (uint8_t)(x101 & UINT8_C(0xff));
-  uint32_t x105 = (x103 + x43);
-  uint32_t x106 = (x105 >> 8);
-  uint8_t x107 = (uint8_t)(x105 & UINT8_C(0xff));
-  uint32_t x108 = (x106 >> 8);
-  uint8_t x109 = (uint8_t)(x106 & UINT8_C(0xff));
-  uint8_t x110 = (uint8_t)(x108 >> 8);
-  uint8_t x111 = (uint8_t)(x108 & UINT8_C(0xff));
-  uint32_t x112 = (x110 + x42);
-  uint32_t x113 = (x112 >> 8);
-  uint8_t x114 = (uint8_t)(x112 & UINT8_C(0xff));
-  uint32_t x115 = (x113 >> 8);
-  uint8_t x116 = (uint8_t)(x113 & UINT8_C(0xff));
-  uint8_t x117 = (uint8_t)(x115 >> 8);
-  uint8_t x118 = (uint8_t)(x115 & UINT8_C(0xff));
-  out1[0] = x51;
-  out1[1] = x53;
-  out1[2] = x55;
-  out1[3] = x58;
-  out1[4] = x60;
-  out1[5] = x62;
-  out1[6] = x65;
-  out1[7] = x67;
-  out1[8] = x69;
-  out1[9] = x72;
-  out1[10] = x74;
-  out1[11] = x76;
-  out1[12] = x79;
-  out1[13] = x81;
-  out1[14] = x83;
-  out1[15] = x84;
-  out1[16] = x86;
-  out1[17] = x88;
-  out1[18] = x90;
-  out1[19] = x93;
-  out1[20] = x95;
-  out1[21] = x97;
-  out1[22] = x100;
-  out1[23] = x102;
-  out1[24] = x104;
-  out1[25] = x107;
-  out1[26] = x109;
-  out1[27] = x111;
-  out1[28] = x114;
-  out1[29] = x116;
-  out1[30] = x118;
+  x42 = (x40 << 6);
+  x43 = (x38 << 4);
+  x44 = (x36 << 3);
+  x45 = (x34 * (uint32_t)0x2);
+  x46 = (x30 << 6);
+  x47 = (x28 << 5);
+  x48 = (x26 << 3);
+  x49 = (x24 << 2);
+  x50 = (uint8_t)(x22 & UINT8_C(0xff));
+  x51 = (x22 >> 8);
+  x52 = (uint8_t)(x51 & UINT8_C(0xff));
+  x53 = (x51 >> 8);
+  x54 = (uint8_t)(x53 & UINT8_C(0xff));
+  x55 = (uint8_t)(x53 >> 8);
+  x56 = (x49 + (uint32_t)x55);
+  x57 = (uint8_t)(x56 & UINT8_C(0xff));
+  x58 = (x56 >> 8);
+  x59 = (uint8_t)(x58 & UINT8_C(0xff));
+  x60 = (x58 >> 8);
+  x61 = (uint8_t)(x60 & UINT8_C(0xff));
+  x62 = (uint8_t)(x60 >> 8);
+  x63 = (x48 + (uint32_t)x62);
+  x64 = (uint8_t)(x63 & UINT8_C(0xff));
+  x65 = (x63 >> 8);
+  x66 = (uint8_t)(x65 & UINT8_C(0xff));
+  x67 = (x65 >> 8);
+  x68 = (uint8_t)(x67 & UINT8_C(0xff));
+  x69 = (uint8_t)(x67 >> 8);
+  x70 = (x47 + (uint32_t)x69);
+  x71 = (uint8_t)(x70 & UINT8_C(0xff));
+  x72 = (x70 >> 8);
+  x73 = (uint8_t)(x72 & UINT8_C(0xff));
+  x74 = (x72 >> 8);
+  x75 = (uint8_t)(x74 & UINT8_C(0xff));
+  x76 = (uint8_t)(x74 >> 8);
+  x77 = (x46 + (uint32_t)x76);
+  x78 = (uint8_t)(x77 & UINT8_C(0xff));
+  x79 = (x77 >> 8);
+  x80 = (uint8_t)(x79 & UINT8_C(0xff));
+  x81 = (x79 >> 8);
+  x82 = (uint8_t)(x81 & UINT8_C(0xff));
+  x83 = (uint8_t)(x81 >> 8);
+  x84 = (uint8_t)(x32 & UINT8_C(0xff));
+  x85 = (x32 >> 8);
+  x86 = (uint8_t)(x85 & UINT8_C(0xff));
+  x87 = (x85 >> 8);
+  x88 = (uint8_t)(x87 & UINT8_C(0xff));
+  x89 = (fiat_25519_uint1)(x87 >> 8);
+  x90 = (x45 + (uint32_t)x89);
+  x91 = (uint8_t)(x90 & UINT8_C(0xff));
+  x92 = (x90 >> 8);
+  x93 = (uint8_t)(x92 & UINT8_C(0xff));
+  x94 = (x92 >> 8);
+  x95 = (uint8_t)(x94 & UINT8_C(0xff));
+  x96 = (uint8_t)(x94 >> 8);
+  x97 = (x44 + (uint32_t)x96);
+  x98 = (uint8_t)(x97 & UINT8_C(0xff));
+  x99 = (x97 >> 8);
+  x100 = (uint8_t)(x99 & UINT8_C(0xff));
+  x101 = (x99 >> 8);
+  x102 = (uint8_t)(x101 & UINT8_C(0xff));
+  x103 = (uint8_t)(x101 >> 8);
+  x104 = (x43 + (uint32_t)x103);
+  x105 = (uint8_t)(x104 & UINT8_C(0xff));
+  x106 = (x104 >> 8);
+  x107 = (uint8_t)(x106 & UINT8_C(0xff));
+  x108 = (x106 >> 8);
+  x109 = (uint8_t)(x108 & UINT8_C(0xff));
+  x110 = (uint8_t)(x108 >> 8);
+  x111 = (x42 + (uint32_t)x110);
+  x112 = (uint8_t)(x111 & UINT8_C(0xff));
+  x113 = (x111 >> 8);
+  x114 = (uint8_t)(x113 & UINT8_C(0xff));
+  x115 = (x113 >> 8);
+  x116 = (uint8_t)(x115 & UINT8_C(0xff));
+  x117 = (uint8_t)(x115 >> 8);
+  out1[0] = x50;
+  out1[1] = x52;
+  out1[2] = x54;
+  out1[3] = x57;
+  out1[4] = x59;
+  out1[5] = x61;
+  out1[6] = x64;
+  out1[7] = x66;
+  out1[8] = x68;
+  out1[9] = x71;
+  out1[10] = x73;
+  out1[11] = x75;
+  out1[12] = x78;
+  out1[13] = x80;
+  out1[14] = x82;
+  out1[15] = x83;
+  out1[16] = x84;
+  out1[17] = x86;
+  out1[18] = x88;
+  out1[19] = x91;
+  out1[20] = x93;
+  out1[21] = x95;
+  out1[22] = x98;
+  out1[23] = x100;
+  out1[24] = x102;
+  out1[25] = x105;
+  out1[26] = x107;
+  out1[27] = x109;
+  out1[28] = x112;
+  out1[29] = x114;
+  out1[30] = x116;
   out1[31] = x117;
 }
 
 /*
  * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order.
+ *
  * Postconditions:
  *   eval out1 mod m = bytes_eval arg1 mod m
  *
  * Input Bounds:
  *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  */
-static void fiat_25519_from_bytes(uint32_t out1[10], const uint8_t arg1[32]) {
-  uint32_t x1 = ((uint32_t)(arg1[31]) << 18);
-  uint32_t x2 = ((uint32_t)(arg1[30]) << 10);
-  uint32_t x3 = ((uint32_t)(arg1[29]) << 2);
-  uint32_t x4 = ((uint32_t)(arg1[28]) << 20);
-  uint32_t x5 = ((uint32_t)(arg1[27]) << 12);
-  uint32_t x6 = ((uint32_t)(arg1[26]) << 4);
-  uint32_t x7 = ((uint32_t)(arg1[25]) << 21);
-  uint32_t x8 = ((uint32_t)(arg1[24]) << 13);
-  uint32_t x9 = ((uint32_t)(arg1[23]) << 5);
-  uint32_t x10 = ((uint32_t)(arg1[22]) << 23);
-  uint32_t x11 = ((uint32_t)(arg1[21]) << 15);
-  uint32_t x12 = ((uint32_t)(arg1[20]) << 7);
-  uint32_t x13 = ((uint32_t)(arg1[19]) << 24);
-  uint32_t x14 = ((uint32_t)(arg1[18]) << 16);
-  uint32_t x15 = ((uint32_t)(arg1[17]) << 8);
-  uint8_t x16 = (arg1[16]);
-  uint32_t x17 = ((uint32_t)(arg1[15]) << 18);
-  uint32_t x18 = ((uint32_t)(arg1[14]) << 10);
-  uint32_t x19 = ((uint32_t)(arg1[13]) << 2);
-  uint32_t x20 = ((uint32_t)(arg1[12]) << 19);
-  uint32_t x21 = ((uint32_t)(arg1[11]) << 11);
-  uint32_t x22 = ((uint32_t)(arg1[10]) << 3);
-  uint32_t x23 = ((uint32_t)(arg1[9]) << 21);
-  uint32_t x24 = ((uint32_t)(arg1[8]) << 13);
-  uint32_t x25 = ((uint32_t)(arg1[7]) << 5);
-  uint32_t x26 = ((uint32_t)(arg1[6]) << 22);
-  uint32_t x27 = ((uint32_t)(arg1[5]) << 14);
-  uint32_t x28 = ((uint32_t)(arg1[4]) << 6);
-  uint32_t x29 = ((uint32_t)(arg1[3]) << 24);
-  uint32_t x30 = ((uint32_t)(arg1[2]) << 16);
-  uint32_t x31 = ((uint32_t)(arg1[1]) << 8);
-  uint8_t x32 = (arg1[0]);
-  uint32_t x33 = (x32 + (x31 + (x30 + x29)));
-  uint8_t x34 = (uint8_t)(x33 >> 26);
-  uint32_t x35 = (x33 & UINT32_C(0x3ffffff));
-  uint32_t x36 = (x3 + (x2 + x1));
-  uint32_t x37 = (x6 + (x5 + x4));
-  uint32_t x38 = (x9 + (x8 + x7));
-  uint32_t x39 = (x12 + (x11 + x10));
-  uint32_t x40 = (x16 + (x15 + (x14 + x13)));
-  uint32_t x41 = (x19 + (x18 + x17));
-  uint32_t x42 = (x22 + (x21 + x20));
-  uint32_t x43 = (x25 + (x24 + x23));
-  uint32_t x44 = (x28 + (x27 + x26));
-  uint32_t x45 = (x34 + x44);
-  uint8_t x46 = (uint8_t)(x45 >> 25);
-  uint32_t x47 = (x45 & UINT32_C(0x1ffffff));
-  uint32_t x48 = (x46 + x43);
-  uint8_t x49 = (uint8_t)(x48 >> 26);
-  uint32_t x50 = (x48 & UINT32_C(0x3ffffff));
-  uint32_t x51 = (x49 + x42);
-  uint8_t x52 = (uint8_t)(x51 >> 25);
-  uint32_t x53 = (x51 & UINT32_C(0x1ffffff));
-  uint32_t x54 = (x52 + x41);
-  uint32_t x55 = (x54 & UINT32_C(0x3ffffff));
-  uint8_t x56 = (uint8_t)(x40 >> 25);
-  uint32_t x57 = (x40 & UINT32_C(0x1ffffff));
-  uint32_t x58 = (x56 + x39);
-  uint8_t x59 = (uint8_t)(x58 >> 26);
-  uint32_t x60 = (x58 & UINT32_C(0x3ffffff));
-  uint32_t x61 = (x59 + x38);
-  uint8_t x62 = (uint8_t)(x61 >> 25);
-  uint32_t x63 = (x61 & UINT32_C(0x1ffffff));
-  uint32_t x64 = (x62 + x37);
-  uint8_t x65 = (uint8_t)(x64 >> 26);
-  uint32_t x66 = (x64 & UINT32_C(0x3ffffff));
-  uint32_t x67 = (x65 + x36);
-  out1[0] = x35;
-  out1[1] = x47;
-  out1[2] = x50;
-  out1[3] = x53;
+static FIAT_25519_FIAT_INLINE void fiat_25519_from_bytes(fiat_25519_tight_field_element out1, const uint8_t arg1[32]) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint32_t x11;
+  uint32_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint8_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint32_t x19;
+  uint32_t x20;
+  uint32_t x21;
+  uint32_t x22;
+  uint32_t x23;
+  uint32_t x24;
+  uint32_t x25;
+  uint32_t x26;
+  uint32_t x27;
+  uint32_t x28;
+  uint32_t x29;
+  uint32_t x30;
+  uint32_t x31;
+  uint8_t x32;
+  uint32_t x33;
+  uint32_t x34;
+  uint32_t x35;
+  uint32_t x36;
+  uint8_t x37;
+  uint32_t x38;
+  uint32_t x39;
+  uint32_t x40;
+  uint32_t x41;
+  uint8_t x42;
+  uint32_t x43;
+  uint32_t x44;
+  uint32_t x45;
+  uint32_t x46;
+  uint8_t x47;
+  uint32_t x48;
+  uint32_t x49;
+  uint32_t x50;
+  uint32_t x51;
+  uint8_t x52;
+  uint32_t x53;
+  uint32_t x54;
+  uint32_t x55;
+  uint32_t x56;
+  uint32_t x57;
+  uint32_t x58;
+  uint32_t x59;
+  uint8_t x60;
+  uint32_t x61;
+  uint32_t x62;
+  uint32_t x63;
+  uint32_t x64;
+  uint8_t x65;
+  uint32_t x66;
+  uint32_t x67;
+  uint32_t x68;
+  uint32_t x69;
+  uint8_t x70;
+  uint32_t x71;
+  uint32_t x72;
+  uint32_t x73;
+  uint32_t x74;
+  uint8_t x75;
+  uint32_t x76;
+  uint32_t x77;
+  uint32_t x78;
+  x1 = ((uint32_t)(arg1[31]) << 18);
+  x2 = ((uint32_t)(arg1[30]) << 10);
+  x3 = ((uint32_t)(arg1[29]) << 2);
+  x4 = ((uint32_t)(arg1[28]) << 20);
+  x5 = ((uint32_t)(arg1[27]) << 12);
+  x6 = ((uint32_t)(arg1[26]) << 4);
+  x7 = ((uint32_t)(arg1[25]) << 21);
+  x8 = ((uint32_t)(arg1[24]) << 13);
+  x9 = ((uint32_t)(arg1[23]) << 5);
+  x10 = ((uint32_t)(arg1[22]) << 23);
+  x11 = ((uint32_t)(arg1[21]) << 15);
+  x12 = ((uint32_t)(arg1[20]) << 7);
+  x13 = ((uint32_t)(arg1[19]) << 24);
+  x14 = ((uint32_t)(arg1[18]) << 16);
+  x15 = ((uint32_t)(arg1[17]) << 8);
+  x16 = (arg1[16]);
+  x17 = ((uint32_t)(arg1[15]) << 18);
+  x18 = ((uint32_t)(arg1[14]) << 10);
+  x19 = ((uint32_t)(arg1[13]) << 2);
+  x20 = ((uint32_t)(arg1[12]) << 19);
+  x21 = ((uint32_t)(arg1[11]) << 11);
+  x22 = ((uint32_t)(arg1[10]) << 3);
+  x23 = ((uint32_t)(arg1[9]) << 21);
+  x24 = ((uint32_t)(arg1[8]) << 13);
+  x25 = ((uint32_t)(arg1[7]) << 5);
+  x26 = ((uint32_t)(arg1[6]) << 22);
+  x27 = ((uint32_t)(arg1[5]) << 14);
+  x28 = ((uint32_t)(arg1[4]) << 6);
+  x29 = ((uint32_t)(arg1[3]) << 24);
+  x30 = ((uint32_t)(arg1[2]) << 16);
+  x31 = ((uint32_t)(arg1[1]) << 8);
+  x32 = (arg1[0]);
+  x33 = (x31 + (uint32_t)x32);
+  x34 = (x30 + x33);
+  x35 = (x29 + x34);
+  x36 = (x35 & UINT32_C(0x3ffffff));
+  x37 = (uint8_t)(x35 >> 26);
+  x38 = (x28 + (uint32_t)x37);
+  x39 = (x27 + x38);
+  x40 = (x26 + x39);
+  x41 = (x40 & UINT32_C(0x1ffffff));
+  x42 = (uint8_t)(x40 >> 25);
+  x43 = (x25 + (uint32_t)x42);
+  x44 = (x24 + x43);
+  x45 = (x23 + x44);
+  x46 = (x45 & UINT32_C(0x3ffffff));
+  x47 = (uint8_t)(x45 >> 26);
+  x48 = (x22 + (uint32_t)x47);
+  x49 = (x21 + x48);
+  x50 = (x20 + x49);
+  x51 = (x50 & UINT32_C(0x1ffffff));
+  x52 = (uint8_t)(x50 >> 25);
+  x53 = (x19 + (uint32_t)x52);
+  x54 = (x18 + x53);
+  x55 = (x17 + x54);
+  x56 = (x15 + (uint32_t)x16);
+  x57 = (x14 + x56);
+  x58 = (x13 + x57);
+  x59 = (x58 & UINT32_C(0x1ffffff));
+  x60 = (uint8_t)(x58 >> 25);
+  x61 = (x12 + (uint32_t)x60);
+  x62 = (x11 + x61);
+  x63 = (x10 + x62);
+  x64 = (x63 & UINT32_C(0x3ffffff));
+  x65 = (uint8_t)(x63 >> 26);
+  x66 = (x9 + (uint32_t)x65);
+  x67 = (x8 + x66);
+  x68 = (x7 + x67);
+  x69 = (x68 & UINT32_C(0x1ffffff));
+  x70 = (uint8_t)(x68 >> 25);
+  x71 = (x6 + (uint32_t)x70);
+  x72 = (x5 + x71);
+  x73 = (x4 + x72);
+  x74 = (x73 & UINT32_C(0x3ffffff));
+  x75 = (uint8_t)(x73 >> 26);
+  x76 = (x3 + (uint32_t)x75);
+  x77 = (x2 + x76);
+  x78 = (x1 + x77);
+  out1[0] = x36;
+  out1[1] = x41;
+  out1[2] = x46;
+  out1[3] = x51;
   out1[4] = x55;
-  out1[5] = x57;
-  out1[6] = x60;
-  out1[7] = x63;
-  out1[8] = x66;
-  out1[9] = x67;
+  out1[5] = x59;
+  out1[6] = x64;
+  out1[7] = x69;
+  out1[8] = x74;
+  out1[9] = x78;
+}
+
+/*
+ * The function fiat_25519_relax is the identity function converting from tight field elements to loose field elements.
+ *
+ * Postconditions:
+ *   out1 = arg1
+ *
+ */
+static FIAT_25519_FIAT_INLINE void fiat_25519_relax(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  x1 = (arg1[0]);
+  x2 = (arg1[1]);
+  x3 = (arg1[2]);
+  x4 = (arg1[3]);
+  x5 = (arg1[4]);
+  x6 = (arg1[5]);
+  x7 = (arg1[6]);
+  x8 = (arg1[7]);
+  x9 = (arg1[8]);
+  x10 = (arg1[9]);
+  out1[0] = x1;
+  out1[1] = x2;
+  out1[2] = x3;
+  out1[3] = x4;
+  out1[4] = x5;
+  out1[5] = x6;
+  out1[6] = x7;
+  out1[7] = x8;
+  out1[8] = x9;
+  out1[9] = x10;
 }
 
 /*
  * The function fiat_25519_carry_scmul_121666 multiplies a field element by 121666 and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (121666 * eval arg1) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
  */
-static void fiat_25519_carry_scmul_121666(uint32_t out1[10], const uint32_t arg1[10]) {
-  uint64_t x1 = ((uint64_t)UINT32_C(0x1db42) * (arg1[9]));
-  uint64_t x2 = ((uint64_t)UINT32_C(0x1db42) * (arg1[8]));
-  uint64_t x3 = ((uint64_t)UINT32_C(0x1db42) * (arg1[7]));
-  uint64_t x4 = ((uint64_t)UINT32_C(0x1db42) * (arg1[6]));
-  uint64_t x5 = ((uint64_t)UINT32_C(0x1db42) * (arg1[5]));
-  uint64_t x6 = ((uint64_t)UINT32_C(0x1db42) * (arg1[4]));
-  uint64_t x7 = ((uint64_t)UINT32_C(0x1db42) * (arg1[3]));
-  uint64_t x8 = ((uint64_t)UINT32_C(0x1db42) * (arg1[2]));
-  uint64_t x9 = ((uint64_t)UINT32_C(0x1db42) * (arg1[1]));
-  uint64_t x10 = ((uint64_t)UINT32_C(0x1db42) * (arg1[0]));
-  uint32_t x11 = (uint32_t)(x10 >> 26);
-  uint32_t x12 = (uint32_t)(x10 & UINT32_C(0x3ffffff));
-  uint64_t x13 = (x11 + x9);
-  uint32_t x14 = (uint32_t)(x13 >> 25);
-  uint32_t x15 = (uint32_t)(x13 & UINT32_C(0x1ffffff));
-  uint64_t x16 = (x14 + x8);
-  uint32_t x17 = (uint32_t)(x16 >> 26);
-  uint32_t x18 = (uint32_t)(x16 & UINT32_C(0x3ffffff));
-  uint64_t x19 = (x17 + x7);
-  uint32_t x20 = (uint32_t)(x19 >> 25);
-  uint32_t x21 = (uint32_t)(x19 & UINT32_C(0x1ffffff));
-  uint64_t x22 = (x20 + x6);
-  uint32_t x23 = (uint32_t)(x22 >> 26);
-  uint32_t x24 = (uint32_t)(x22 & UINT32_C(0x3ffffff));
-  uint64_t x25 = (x23 + x5);
-  uint32_t x26 = (uint32_t)(x25 >> 25);
-  uint32_t x27 = (uint32_t)(x25 & UINT32_C(0x1ffffff));
-  uint64_t x28 = (x26 + x4);
-  uint32_t x29 = (uint32_t)(x28 >> 26);
-  uint32_t x30 = (uint32_t)(x28 & UINT32_C(0x3ffffff));
-  uint64_t x31 = (x29 + x3);
-  uint32_t x32 = (uint32_t)(x31 >> 25);
-  uint32_t x33 = (uint32_t)(x31 & UINT32_C(0x1ffffff));
-  uint64_t x34 = (x32 + x2);
-  uint32_t x35 = (uint32_t)(x34 >> 26);
-  uint32_t x36 = (uint32_t)(x34 & UINT32_C(0x3ffffff));
-  uint64_t x37 = (x35 + x1);
-  uint32_t x38 = (uint32_t)(x37 >> 25);
-  uint32_t x39 = (uint32_t)(x37 & UINT32_C(0x1ffffff));
-  uint32_t x40 = (x38 * UINT8_C(0x13));
-  uint32_t x41 = (x12 + x40);
-  fiat_25519_uint1 x42 = (fiat_25519_uint1)(x41 >> 26);
-  uint32_t x43 = (x41 & UINT32_C(0x3ffffff));
-  uint32_t x44 = (x42 + x15);
-  fiat_25519_uint1 x45 = (fiat_25519_uint1)(x44 >> 25);
-  uint32_t x46 = (x44 & UINT32_C(0x1ffffff));
-  uint32_t x47 = (x45 + x18);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_scmul_121666(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint32_t x11;
+  uint32_t x12;
+  uint64_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint64_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint64_t x19;
+  uint32_t x20;
+  uint32_t x21;
+  uint64_t x22;
+  uint32_t x23;
+  uint32_t x24;
+  uint64_t x25;
+  uint32_t x26;
+  uint32_t x27;
+  uint64_t x28;
+  uint32_t x29;
+  uint32_t x30;
+  uint64_t x31;
+  uint32_t x32;
+  uint32_t x33;
+  uint64_t x34;
+  uint32_t x35;
+  uint32_t x36;
+  uint64_t x37;
+  uint32_t x38;
+  uint32_t x39;
+  uint32_t x40;
+  uint32_t x41;
+  fiat_25519_uint1 x42;
+  uint32_t x43;
+  uint32_t x44;
+  fiat_25519_uint1 x45;
+  uint32_t x46;
+  uint32_t x47;
+  x1 = ((uint64_t)UINT32_C(0x1db42) * (arg1[9]));
+  x2 = ((uint64_t)UINT32_C(0x1db42) * (arg1[8]));
+  x3 = ((uint64_t)UINT32_C(0x1db42) * (arg1[7]));
+  x4 = ((uint64_t)UINT32_C(0x1db42) * (arg1[6]));
+  x5 = ((uint64_t)UINT32_C(0x1db42) * (arg1[5]));
+  x6 = ((uint64_t)UINT32_C(0x1db42) * (arg1[4]));
+  x7 = ((uint64_t)UINT32_C(0x1db42) * (arg1[3]));
+  x8 = ((uint64_t)UINT32_C(0x1db42) * (arg1[2]));
+  x9 = ((uint64_t)UINT32_C(0x1db42) * (arg1[1]));
+  x10 = ((uint64_t)UINT32_C(0x1db42) * (arg1[0]));
+  x11 = (uint32_t)(x10 >> 26);
+  x12 = (uint32_t)(x10 & UINT32_C(0x3ffffff));
+  x13 = (x11 + x9);
+  x14 = (uint32_t)(x13 >> 25);
+  x15 = (uint32_t)(x13 & UINT32_C(0x1ffffff));
+  x16 = (x14 + x8);
+  x17 = (uint32_t)(x16 >> 26);
+  x18 = (uint32_t)(x16 & UINT32_C(0x3ffffff));
+  x19 = (x17 + x7);
+  x20 = (uint32_t)(x19 >> 25);
+  x21 = (uint32_t)(x19 & UINT32_C(0x1ffffff));
+  x22 = (x20 + x6);
+  x23 = (uint32_t)(x22 >> 26);
+  x24 = (uint32_t)(x22 & UINT32_C(0x3ffffff));
+  x25 = (x23 + x5);
+  x26 = (uint32_t)(x25 >> 25);
+  x27 = (uint32_t)(x25 & UINT32_C(0x1ffffff));
+  x28 = (x26 + x4);
+  x29 = (uint32_t)(x28 >> 26);
+  x30 = (uint32_t)(x28 & UINT32_C(0x3ffffff));
+  x31 = (x29 + x3);
+  x32 = (uint32_t)(x31 >> 25);
+  x33 = (uint32_t)(x31 & UINT32_C(0x1ffffff));
+  x34 = (x32 + x2);
+  x35 = (uint32_t)(x34 >> 26);
+  x36 = (uint32_t)(x34 & UINT32_C(0x3ffffff));
+  x37 = (x35 + x1);
+  x38 = (uint32_t)(x37 >> 25);
+  x39 = (uint32_t)(x37 & UINT32_C(0x1ffffff));
+  x40 = (x38 * UINT8_C(0x13));
+  x41 = (x12 + x40);
+  x42 = (fiat_25519_uint1)(x41 >> 26);
+  x43 = (x41 & UINT32_C(0x3ffffff));
+  x44 = (x42 + x15);
+  x45 = (fiat_25519_uint1)(x44 >> 25);
+  x46 = (x44 & UINT32_C(0x1ffffff));
+  x47 = (x45 + x18);
   out1[0] = x43;
   out1[1] = x46;
   out1[2] = x47;
@@ -978,4 +1563,3 @@
   out1[8] = x36;
   out1[9] = x39;
 }
-
diff --git a/src/third_party/fiat/curve25519_64.h b/src/third_party/fiat/curve25519_64.h
index 02679bb..faed049 100644
--- a/src/third_party/fiat/curve25519_64.h
+++ b/src/third_party/fiat/curve25519_64.h
@@ -1,26 +1,56 @@
-/* Autogenerated: src/ExtractionOCaml/unsaturated_solinas --static 25519 5 '2^255 - 19' 64 carry_mul carry_square carry add sub opp selectznz to_bytes from_bytes carry_scmul121666 */
+/* Autogenerated: 'src/ExtractionOCaml/unsaturated_solinas' --inline --static --use-value-barrier 25519 64 '(auto)' '2^255 - 19' carry_mul carry_square carry add sub opp selectznz to_bytes from_bytes relax carry_scmul121666 */
 /* curve description: 25519 */
-/* requested operations: carry_mul, carry_square, carry, add, sub, opp, selectznz, to_bytes, from_bytes, carry_scmul121666 */
-/* n = 5 (from "5") */
-/* s-c = 2^255 - [(1, 19)] (from "2^255 - 19") */
 /* machine_wordsize = 64 (from "64") */
-
+/* requested operations: carry_mul, carry_square, carry, add, sub, opp, selectznz, to_bytes, from_bytes, relax, carry_scmul121666 */
+/* n = 5 (from "(auto)") */
+/* s-c = 2^255 - [(1, 19)] (from "2^255 - 19") */
+/* tight_bounds_multiplier = 1 (from "") */
+/*  */
 /* Computed values: */
-/* carry_chain = [0, 1, 2, 3, 4, 0, 1] */
+/*   carry_chain = [0, 1, 2, 3, 4, 0, 1] */
+/*   eval z = z[0] + (z[1] << 51) + (z[2] << 102) + (z[3] << 153) + (z[4] << 204) */
+/*   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
+/*   balance = [0xfffffffffffda, 0xffffffffffffe, 0xffffffffffffe, 0xffffffffffffe, 0xffffffffffffe] */
 
 #include <stdint.h>
 typedef unsigned char fiat_25519_uint1;
 typedef signed char fiat_25519_int1;
-typedef signed __int128 fiat_25519_int128;
-typedef unsigned __int128 fiat_25519_uint128;
+#if defined(__GNUC__) || defined(__clang__)
+#  define FIAT_25519_FIAT_EXTENSION __extension__
+#  define FIAT_25519_FIAT_INLINE __inline__
+#else
+#  define FIAT_25519_FIAT_EXTENSION
+#  define FIAT_25519_FIAT_INLINE
+#endif
+
+FIAT_25519_FIAT_EXTENSION typedef signed __int128 fiat_25519_int128;
+FIAT_25519_FIAT_EXTENSION typedef unsigned __int128 fiat_25519_uint128;
+
+/* The type fiat_25519_loose_field_element is a field element with loose bounds. */
+/* Bounds: [[0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000], [0x0 ~> 0x18000000000000]] */
+typedef uint64_t fiat_25519_loose_field_element[5];
+
+/* The type fiat_25519_tight_field_element is a field element with tight bounds. */
+/* Bounds: [[0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000], [0x0 ~> 0x8000000000000]] */
+typedef uint64_t fiat_25519_tight_field_element[5];
 
 #if (-1 & 3) != 3
 #error "This code only works on a two's complement system"
 #endif
 
+#if !defined(FIAT_25519_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint64_t fiat_25519_value_barrier_u64(uint64_t a) {
+  __asm__("" : "+r"(a) : /* no inputs */);
+  return a;
+}
+#else
+#  define fiat_25519_value_barrier_u64(x) (x)
+#endif
+
 
 /*
  * The function fiat_25519_addcarryx_u51 is an addition with carry.
+ *
  * Postconditions:
  *   out1 = (arg1 + arg2 + arg3) mod 2^51
  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^51⌋
@@ -33,16 +63,20 @@
  *   out1: [0x0 ~> 0x7ffffffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_addcarryx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  uint64_t x1 = ((arg1 + arg2) + arg3);
-  uint64_t x2 = (x1 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint1 x3 = (fiat_25519_uint1)(x1 >> 51);
+static FIAT_25519_FIAT_INLINE void fiat_25519_addcarryx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  uint64_t x1;
+  uint64_t x2;
+  fiat_25519_uint1 x3;
+  x1 = ((arg1 + arg2) + arg3);
+  x2 = (x1 & UINT64_C(0x7ffffffffffff));
+  x3 = (fiat_25519_uint1)(x1 >> 51);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_25519_subborrowx_u51 is a subtraction with borrow.
+ *
  * Postconditions:
  *   out1 = (-arg1 + arg2 + -arg3) mod 2^51
  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^51⌋
@@ -55,16 +89,20 @@
  *   out1: [0x0 ~> 0x7ffffffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  int64_t x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
-  fiat_25519_int1 x2 = (fiat_25519_int1)(x1 >> 51);
-  uint64_t x3 = (x1 & UINT64_C(0x7ffffffffffff));
+static FIAT_25519_FIAT_INLINE void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  int64_t x1;
+  fiat_25519_int1 x2;
+  uint64_t x3;
+  x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
+  x2 = (fiat_25519_int1)(x1 >> 51);
+  x3 = (x1 & UINT64_C(0x7ffffffffffff));
   *out1 = x3;
   *out2 = (fiat_25519_uint1)(0x0 - x2);
 }
 
 /*
  * The function fiat_25519_cmovznz_u64 is a single-word conditional move.
+ *
  * Postconditions:
  *   out1 = (if arg1 = 0 then arg2 else arg3)
  *
@@ -75,83 +113,128 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffffffffffff]
  */
-static void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  fiat_25519_uint1 x1 = (!(!arg1));
-  uint64_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
-  // Note this line has been patched from the synthesized code to add value
-  // barriers.
-  //
-  // Clang recognizes this pattern as a select. While it usually transforms it
-  // to a cmov, it sometimes further transforms it into a branch, which we do
-  // not want.
-  uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
+static FIAT_25519_FIAT_INLINE void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  fiat_25519_uint1 x1;
+  uint64_t x2;
+  uint64_t x3;
+  x1 = (!(!arg1));
+  x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
+  x3 = ((fiat_25519_value_barrier_u64(x2) & arg3) | (fiat_25519_value_barrier_u64((~x2)) & arg2));
   *out1 = x3;
 }
 
 /*
  * The function fiat_25519_carry_mul multiplies two field elements and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
- *   arg2: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  */
-static void fiat_25519_carry_mul(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
-  fiat_25519_uint128 x1 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[4]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x2 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[3]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x3 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[2]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x4 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[1]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x5 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[4]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x6 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[3]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x7 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[2]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x8 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[4]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x9 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[3]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x10 = ((fiat_25519_uint128)(arg1[1]) * ((arg2[4]) * UINT8_C(0x13)));
-  fiat_25519_uint128 x11 = ((fiat_25519_uint128)(arg1[4]) * (arg2[0]));
-  fiat_25519_uint128 x12 = ((fiat_25519_uint128)(arg1[3]) * (arg2[1]));
-  fiat_25519_uint128 x13 = ((fiat_25519_uint128)(arg1[3]) * (arg2[0]));
-  fiat_25519_uint128 x14 = ((fiat_25519_uint128)(arg1[2]) * (arg2[2]));
-  fiat_25519_uint128 x15 = ((fiat_25519_uint128)(arg1[2]) * (arg2[1]));
-  fiat_25519_uint128 x16 = ((fiat_25519_uint128)(arg1[2]) * (arg2[0]));
-  fiat_25519_uint128 x17 = ((fiat_25519_uint128)(arg1[1]) * (arg2[3]));
-  fiat_25519_uint128 x18 = ((fiat_25519_uint128)(arg1[1]) * (arg2[2]));
-  fiat_25519_uint128 x19 = ((fiat_25519_uint128)(arg1[1]) * (arg2[1]));
-  fiat_25519_uint128 x20 = ((fiat_25519_uint128)(arg1[1]) * (arg2[0]));
-  fiat_25519_uint128 x21 = ((fiat_25519_uint128)(arg1[0]) * (arg2[4]));
-  fiat_25519_uint128 x22 = ((fiat_25519_uint128)(arg1[0]) * (arg2[3]));
-  fiat_25519_uint128 x23 = ((fiat_25519_uint128)(arg1[0]) * (arg2[2]));
-  fiat_25519_uint128 x24 = ((fiat_25519_uint128)(arg1[0]) * (arg2[1]));
-  fiat_25519_uint128 x25 = ((fiat_25519_uint128)(arg1[0]) * (arg2[0]));
-  fiat_25519_uint128 x26 = (x25 + (x10 + (x9 + (x7 + x4))));
-  uint64_t x27 = (uint64_t)(x26 >> 51);
-  uint64_t x28 = (uint64_t)(x26 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x29 = (x21 + (x17 + (x14 + (x12 + x11))));
-  fiat_25519_uint128 x30 = (x22 + (x18 + (x15 + (x13 + x1))));
-  fiat_25519_uint128 x31 = (x23 + (x19 + (x16 + (x5 + x2))));
-  fiat_25519_uint128 x32 = (x24 + (x20 + (x8 + (x6 + x3))));
-  fiat_25519_uint128 x33 = (x27 + x32);
-  uint64_t x34 = (uint64_t)(x33 >> 51);
-  uint64_t x35 = (uint64_t)(x33 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x36 = (x34 + x31);
-  uint64_t x37 = (uint64_t)(x36 >> 51);
-  uint64_t x38 = (uint64_t)(x36 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x39 = (x37 + x30);
-  uint64_t x40 = (uint64_t)(x39 >> 51);
-  uint64_t x41 = (uint64_t)(x39 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x42 = (x40 + x29);
-  uint64_t x43 = (uint64_t)(x42 >> 51);
-  uint64_t x44 = (uint64_t)(x42 & UINT64_C(0x7ffffffffffff));
-  uint64_t x45 = (x43 * UINT8_C(0x13));
-  uint64_t x46 = (x28 + x45);
-  uint64_t x47 = (x46 >> 51);
-  uint64_t x48 = (x46 & UINT64_C(0x7ffffffffffff));
-  uint64_t x49 = (x47 + x35);
-  fiat_25519_uint1 x50 = (fiat_25519_uint1)(x49 >> 51);
-  uint64_t x51 = (x49 & UINT64_C(0x7ffffffffffff));
-  uint64_t x52 = (x50 + x38);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_mul(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1, const fiat_25519_loose_field_element arg2) {
+  fiat_25519_uint128 x1;
+  fiat_25519_uint128 x2;
+  fiat_25519_uint128 x3;
+  fiat_25519_uint128 x4;
+  fiat_25519_uint128 x5;
+  fiat_25519_uint128 x6;
+  fiat_25519_uint128 x7;
+  fiat_25519_uint128 x8;
+  fiat_25519_uint128 x9;
+  fiat_25519_uint128 x10;
+  fiat_25519_uint128 x11;
+  fiat_25519_uint128 x12;
+  fiat_25519_uint128 x13;
+  fiat_25519_uint128 x14;
+  fiat_25519_uint128 x15;
+  fiat_25519_uint128 x16;
+  fiat_25519_uint128 x17;
+  fiat_25519_uint128 x18;
+  fiat_25519_uint128 x19;
+  fiat_25519_uint128 x20;
+  fiat_25519_uint128 x21;
+  fiat_25519_uint128 x22;
+  fiat_25519_uint128 x23;
+  fiat_25519_uint128 x24;
+  fiat_25519_uint128 x25;
+  fiat_25519_uint128 x26;
+  uint64_t x27;
+  uint64_t x28;
+  fiat_25519_uint128 x29;
+  fiat_25519_uint128 x30;
+  fiat_25519_uint128 x31;
+  fiat_25519_uint128 x32;
+  fiat_25519_uint128 x33;
+  uint64_t x34;
+  uint64_t x35;
+  fiat_25519_uint128 x36;
+  uint64_t x37;
+  uint64_t x38;
+  fiat_25519_uint128 x39;
+  uint64_t x40;
+  uint64_t x41;
+  fiat_25519_uint128 x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  uint64_t x48;
+  uint64_t x49;
+  fiat_25519_uint1 x50;
+  uint64_t x51;
+  uint64_t x52;
+  x1 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[4]) * UINT8_C(0x13)));
+  x2 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[3]) * UINT8_C(0x13)));
+  x3 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[2]) * UINT8_C(0x13)));
+  x4 = ((fiat_25519_uint128)(arg1[4]) * ((arg2[1]) * UINT8_C(0x13)));
+  x5 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[4]) * UINT8_C(0x13)));
+  x6 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[3]) * UINT8_C(0x13)));
+  x7 = ((fiat_25519_uint128)(arg1[3]) * ((arg2[2]) * UINT8_C(0x13)));
+  x8 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[4]) * UINT8_C(0x13)));
+  x9 = ((fiat_25519_uint128)(arg1[2]) * ((arg2[3]) * UINT8_C(0x13)));
+  x10 = ((fiat_25519_uint128)(arg1[1]) * ((arg2[4]) * UINT8_C(0x13)));
+  x11 = ((fiat_25519_uint128)(arg1[4]) * (arg2[0]));
+  x12 = ((fiat_25519_uint128)(arg1[3]) * (arg2[1]));
+  x13 = ((fiat_25519_uint128)(arg1[3]) * (arg2[0]));
+  x14 = ((fiat_25519_uint128)(arg1[2]) * (arg2[2]));
+  x15 = ((fiat_25519_uint128)(arg1[2]) * (arg2[1]));
+  x16 = ((fiat_25519_uint128)(arg1[2]) * (arg2[0]));
+  x17 = ((fiat_25519_uint128)(arg1[1]) * (arg2[3]));
+  x18 = ((fiat_25519_uint128)(arg1[1]) * (arg2[2]));
+  x19 = ((fiat_25519_uint128)(arg1[1]) * (arg2[1]));
+  x20 = ((fiat_25519_uint128)(arg1[1]) * (arg2[0]));
+  x21 = ((fiat_25519_uint128)(arg1[0]) * (arg2[4]));
+  x22 = ((fiat_25519_uint128)(arg1[0]) * (arg2[3]));
+  x23 = ((fiat_25519_uint128)(arg1[0]) * (arg2[2]));
+  x24 = ((fiat_25519_uint128)(arg1[0]) * (arg2[1]));
+  x25 = ((fiat_25519_uint128)(arg1[0]) * (arg2[0]));
+  x26 = (x25 + (x10 + (x9 + (x7 + x4))));
+  x27 = (uint64_t)(x26 >> 51);
+  x28 = (uint64_t)(x26 & UINT64_C(0x7ffffffffffff));
+  x29 = (x21 + (x17 + (x14 + (x12 + x11))));
+  x30 = (x22 + (x18 + (x15 + (x13 + x1))));
+  x31 = (x23 + (x19 + (x16 + (x5 + x2))));
+  x32 = (x24 + (x20 + (x8 + (x6 + x3))));
+  x33 = (x27 + x32);
+  x34 = (uint64_t)(x33 >> 51);
+  x35 = (uint64_t)(x33 & UINT64_C(0x7ffffffffffff));
+  x36 = (x34 + x31);
+  x37 = (uint64_t)(x36 >> 51);
+  x38 = (uint64_t)(x36 & UINT64_C(0x7ffffffffffff));
+  x39 = (x37 + x30);
+  x40 = (uint64_t)(x39 >> 51);
+  x41 = (uint64_t)(x39 & UINT64_C(0x7ffffffffffff));
+  x42 = (x40 + x29);
+  x43 = (uint64_t)(x42 >> 51);
+  x44 = (uint64_t)(x42 & UINT64_C(0x7ffffffffffff));
+  x45 = (x43 * UINT8_C(0x13));
+  x46 = (x28 + x45);
+  x47 = (x46 >> 51);
+  x48 = (x46 & UINT64_C(0x7ffffffffffff));
+  x49 = (x47 + x35);
+  x50 = (fiat_25519_uint1)(x49 >> 51);
+  x51 = (x49 & UINT64_C(0x7ffffffffffff));
+  x52 = (x50 + x38);
   out1[0] = x48;
   out1[1] = x51;
   out1[2] = x52;
@@ -161,65 +244,112 @@
 
 /*
  * The function fiat_25519_carry_square squares a field element and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * eval arg1) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  */
-static void fiat_25519_carry_square(uint64_t out1[5], const uint64_t arg1[5]) {
-  uint64_t x1 = ((arg1[4]) * UINT8_C(0x13));
-  uint64_t x2 = (x1 * 0x2);
-  uint64_t x3 = ((arg1[4]) * 0x2);
-  uint64_t x4 = ((arg1[3]) * UINT8_C(0x13));
-  uint64_t x5 = (x4 * 0x2);
-  uint64_t x6 = ((arg1[3]) * 0x2);
-  uint64_t x7 = ((arg1[2]) * 0x2);
-  uint64_t x8 = ((arg1[1]) * 0x2);
-  fiat_25519_uint128 x9 = ((fiat_25519_uint128)(arg1[4]) * x1);
-  fiat_25519_uint128 x10 = ((fiat_25519_uint128)(arg1[3]) * x2);
-  fiat_25519_uint128 x11 = ((fiat_25519_uint128)(arg1[3]) * x4);
-  fiat_25519_uint128 x12 = ((fiat_25519_uint128)(arg1[2]) * x2);
-  fiat_25519_uint128 x13 = ((fiat_25519_uint128)(arg1[2]) * x5);
-  fiat_25519_uint128 x14 = ((fiat_25519_uint128)(arg1[2]) * (arg1[2]));
-  fiat_25519_uint128 x15 = ((fiat_25519_uint128)(arg1[1]) * x2);
-  fiat_25519_uint128 x16 = ((fiat_25519_uint128)(arg1[1]) * x6);
-  fiat_25519_uint128 x17 = ((fiat_25519_uint128)(arg1[1]) * x7);
-  fiat_25519_uint128 x18 = ((fiat_25519_uint128)(arg1[1]) * (arg1[1]));
-  fiat_25519_uint128 x19 = ((fiat_25519_uint128)(arg1[0]) * x3);
-  fiat_25519_uint128 x20 = ((fiat_25519_uint128)(arg1[0]) * x6);
-  fiat_25519_uint128 x21 = ((fiat_25519_uint128)(arg1[0]) * x7);
-  fiat_25519_uint128 x22 = ((fiat_25519_uint128)(arg1[0]) * x8);
-  fiat_25519_uint128 x23 = ((fiat_25519_uint128)(arg1[0]) * (arg1[0]));
-  fiat_25519_uint128 x24 = (x23 + (x15 + x13));
-  uint64_t x25 = (uint64_t)(x24 >> 51);
-  uint64_t x26 = (uint64_t)(x24 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x27 = (x19 + (x16 + x14));
-  fiat_25519_uint128 x28 = (x20 + (x17 + x9));
-  fiat_25519_uint128 x29 = (x21 + (x18 + x10));
-  fiat_25519_uint128 x30 = (x22 + (x12 + x11));
-  fiat_25519_uint128 x31 = (x25 + x30);
-  uint64_t x32 = (uint64_t)(x31 >> 51);
-  uint64_t x33 = (uint64_t)(x31 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x34 = (x32 + x29);
-  uint64_t x35 = (uint64_t)(x34 >> 51);
-  uint64_t x36 = (uint64_t)(x34 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x37 = (x35 + x28);
-  uint64_t x38 = (uint64_t)(x37 >> 51);
-  uint64_t x39 = (uint64_t)(x37 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x40 = (x38 + x27);
-  uint64_t x41 = (uint64_t)(x40 >> 51);
-  uint64_t x42 = (uint64_t)(x40 & UINT64_C(0x7ffffffffffff));
-  uint64_t x43 = (x41 * UINT8_C(0x13));
-  uint64_t x44 = (x26 + x43);
-  uint64_t x45 = (x44 >> 51);
-  uint64_t x46 = (x44 & UINT64_C(0x7ffffffffffff));
-  uint64_t x47 = (x45 + x33);
-  fiat_25519_uint1 x48 = (fiat_25519_uint1)(x47 >> 51);
-  uint64_t x49 = (x47 & UINT64_C(0x7ffffffffffff));
-  uint64_t x50 = (x48 + x36);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_square(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  fiat_25519_uint128 x9;
+  fiat_25519_uint128 x10;
+  fiat_25519_uint128 x11;
+  fiat_25519_uint128 x12;
+  fiat_25519_uint128 x13;
+  fiat_25519_uint128 x14;
+  fiat_25519_uint128 x15;
+  fiat_25519_uint128 x16;
+  fiat_25519_uint128 x17;
+  fiat_25519_uint128 x18;
+  fiat_25519_uint128 x19;
+  fiat_25519_uint128 x20;
+  fiat_25519_uint128 x21;
+  fiat_25519_uint128 x22;
+  fiat_25519_uint128 x23;
+  fiat_25519_uint128 x24;
+  uint64_t x25;
+  uint64_t x26;
+  fiat_25519_uint128 x27;
+  fiat_25519_uint128 x28;
+  fiat_25519_uint128 x29;
+  fiat_25519_uint128 x30;
+  fiat_25519_uint128 x31;
+  uint64_t x32;
+  uint64_t x33;
+  fiat_25519_uint128 x34;
+  uint64_t x35;
+  uint64_t x36;
+  fiat_25519_uint128 x37;
+  uint64_t x38;
+  uint64_t x39;
+  fiat_25519_uint128 x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  fiat_25519_uint1 x48;
+  uint64_t x49;
+  uint64_t x50;
+  x1 = ((arg1[4]) * UINT8_C(0x13));
+  x2 = (x1 * 0x2);
+  x3 = ((arg1[4]) * 0x2);
+  x4 = ((arg1[3]) * UINT8_C(0x13));
+  x5 = (x4 * 0x2);
+  x6 = ((arg1[3]) * 0x2);
+  x7 = ((arg1[2]) * 0x2);
+  x8 = ((arg1[1]) * 0x2);
+  x9 = ((fiat_25519_uint128)(arg1[4]) * x1);
+  x10 = ((fiat_25519_uint128)(arg1[3]) * x2);
+  x11 = ((fiat_25519_uint128)(arg1[3]) * x4);
+  x12 = ((fiat_25519_uint128)(arg1[2]) * x2);
+  x13 = ((fiat_25519_uint128)(arg1[2]) * x5);
+  x14 = ((fiat_25519_uint128)(arg1[2]) * (arg1[2]));
+  x15 = ((fiat_25519_uint128)(arg1[1]) * x2);
+  x16 = ((fiat_25519_uint128)(arg1[1]) * x6);
+  x17 = ((fiat_25519_uint128)(arg1[1]) * x7);
+  x18 = ((fiat_25519_uint128)(arg1[1]) * (arg1[1]));
+  x19 = ((fiat_25519_uint128)(arg1[0]) * x3);
+  x20 = ((fiat_25519_uint128)(arg1[0]) * x6);
+  x21 = ((fiat_25519_uint128)(arg1[0]) * x7);
+  x22 = ((fiat_25519_uint128)(arg1[0]) * x8);
+  x23 = ((fiat_25519_uint128)(arg1[0]) * (arg1[0]));
+  x24 = (x23 + (x15 + x13));
+  x25 = (uint64_t)(x24 >> 51);
+  x26 = (uint64_t)(x24 & UINT64_C(0x7ffffffffffff));
+  x27 = (x19 + (x16 + x14));
+  x28 = (x20 + (x17 + x9));
+  x29 = (x21 + (x18 + x10));
+  x30 = (x22 + (x12 + x11));
+  x31 = (x25 + x30);
+  x32 = (uint64_t)(x31 >> 51);
+  x33 = (uint64_t)(x31 & UINT64_C(0x7ffffffffffff));
+  x34 = (x32 + x29);
+  x35 = (uint64_t)(x34 >> 51);
+  x36 = (uint64_t)(x34 & UINT64_C(0x7ffffffffffff));
+  x37 = (x35 + x28);
+  x38 = (uint64_t)(x37 >> 51);
+  x39 = (uint64_t)(x37 & UINT64_C(0x7ffffffffffff));
+  x40 = (x38 + x27);
+  x41 = (uint64_t)(x40 >> 51);
+  x42 = (uint64_t)(x40 & UINT64_C(0x7ffffffffffff));
+  x43 = (x41 * UINT8_C(0x13));
+  x44 = (x26 + x43);
+  x45 = (x44 >> 51);
+  x46 = (x44 & UINT64_C(0x7ffffffffffff));
+  x47 = (x45 + x33);
+  x48 = (fiat_25519_uint1)(x47 >> 51);
+  x49 = (x47 & UINT64_C(0x7ffffffffffff));
+  x50 = (x48 + x36);
   out1[0] = x46;
   out1[1] = x49;
   out1[2] = x50;
@@ -229,27 +359,36 @@
 
 /*
  * The function fiat_25519_carry reduces a field element.
+ *
  * Postconditions:
  *   eval out1 mod m = eval arg1 mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  */
-static void fiat_25519_carry(uint64_t out1[5], const uint64_t arg1[5]) {
-  uint64_t x1 = (arg1[0]);
-  uint64_t x2 = ((x1 >> 51) + (arg1[1]));
-  uint64_t x3 = ((x2 >> 51) + (arg1[2]));
-  uint64_t x4 = ((x3 >> 51) + (arg1[3]));
-  uint64_t x5 = ((x4 >> 51) + (arg1[4]));
-  uint64_t x6 = ((x1 & UINT64_C(0x7ffffffffffff)) + ((x5 >> 51) * UINT8_C(0x13)));
-  uint64_t x7 = ((fiat_25519_uint1)(x6 >> 51) + (x2 & UINT64_C(0x7ffffffffffff)));
-  uint64_t x8 = (x6 & UINT64_C(0x7ffffffffffff));
-  uint64_t x9 = (x7 & UINT64_C(0x7ffffffffffff));
-  uint64_t x10 = ((fiat_25519_uint1)(x7 >> 51) + (x3 & UINT64_C(0x7ffffffffffff)));
-  uint64_t x11 = (x4 & UINT64_C(0x7ffffffffffff));
-  uint64_t x12 = (x5 & UINT64_C(0x7ffffffffffff));
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  x1 = (arg1[0]);
+  x2 = ((x1 >> 51) + (arg1[1]));
+  x3 = ((x2 >> 51) + (arg1[2]));
+  x4 = ((x3 >> 51) + (arg1[3]));
+  x5 = ((x4 >> 51) + (arg1[4]));
+  x6 = ((x1 & UINT64_C(0x7ffffffffffff)) + ((x5 >> 51) * UINT8_C(0x13)));
+  x7 = ((fiat_25519_uint1)(x6 >> 51) + (x2 & UINT64_C(0x7ffffffffffff)));
+  x8 = (x6 & UINT64_C(0x7ffffffffffff));
+  x9 = (x7 & UINT64_C(0x7ffffffffffff));
+  x10 = ((fiat_25519_uint1)(x7 >> 51) + (x3 & UINT64_C(0x7ffffffffffff)));
+  x11 = (x4 & UINT64_C(0x7ffffffffffff));
+  x12 = (x5 & UINT64_C(0x7ffffffffffff));
   out1[0] = x8;
   out1[1] = x9;
   out1[2] = x10;
@@ -259,21 +398,22 @@
 
 /*
  * The function fiat_25519_add adds two field elements.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 + eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
- *   arg2: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
  */
-static void fiat_25519_add(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
-  uint64_t x1 = ((arg1[0]) + (arg2[0]));
-  uint64_t x2 = ((arg1[1]) + (arg2[1]));
-  uint64_t x3 = ((arg1[2]) + (arg2[2]));
-  uint64_t x4 = ((arg1[3]) + (arg2[3]));
-  uint64_t x5 = ((arg1[4]) + (arg2[4]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_add(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  x1 = ((arg1[0]) + (arg2[0]));
+  x2 = ((arg1[1]) + (arg2[1]));
+  x3 = ((arg1[2]) + (arg2[2]));
+  x4 = ((arg1[3]) + (arg2[3]));
+  x5 = ((arg1[4]) + (arg2[4]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -283,21 +423,22 @@
 
 /*
  * The function fiat_25519_sub subtracts two field elements.
+ *
  * Postconditions:
  *   eval out1 mod m = (eval arg1 - eval arg2) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
- *   arg2: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
  */
-static void fiat_25519_sub(uint64_t out1[5], const uint64_t arg1[5], const uint64_t arg2[5]) {
-  uint64_t x1 = ((UINT64_C(0xfffffffffffda) + (arg1[0])) - (arg2[0]));
-  uint64_t x2 = ((UINT64_C(0xffffffffffffe) + (arg1[1])) - (arg2[1]));
-  uint64_t x3 = ((UINT64_C(0xffffffffffffe) + (arg1[2])) - (arg2[2]));
-  uint64_t x4 = ((UINT64_C(0xffffffffffffe) + (arg1[3])) - (arg2[3]));
-  uint64_t x5 = ((UINT64_C(0xffffffffffffe) + (arg1[4])) - (arg2[4]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_sub(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1, const fiat_25519_tight_field_element arg2) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  x1 = ((UINT64_C(0xfffffffffffda) + (arg1[0])) - (arg2[0]));
+  x2 = ((UINT64_C(0xffffffffffffe) + (arg1[1])) - (arg2[1]));
+  x3 = ((UINT64_C(0xffffffffffffe) + (arg1[2])) - (arg2[2]));
+  x4 = ((UINT64_C(0xffffffffffffe) + (arg1[3])) - (arg2[3]));
+  x5 = ((UINT64_C(0xffffffffffffe) + (arg1[4])) - (arg2[4]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -307,20 +448,22 @@
 
 /*
  * The function fiat_25519_opp negates a field element.
+ *
  * Postconditions:
  *   eval out1 mod m = -eval arg1 mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
  */
-static void fiat_25519_opp(uint64_t out1[5], const uint64_t arg1[5]) {
-  uint64_t x1 = (UINT64_C(0xfffffffffffda) - (arg1[0]));
-  uint64_t x2 = (UINT64_C(0xffffffffffffe) - (arg1[1]));
-  uint64_t x3 = (UINT64_C(0xffffffffffffe) - (arg1[2]));
-  uint64_t x4 = (UINT64_C(0xffffffffffffe) - (arg1[3]));
-  uint64_t x5 = (UINT64_C(0xffffffffffffe) - (arg1[4]));
+static FIAT_25519_FIAT_INLINE void fiat_25519_opp(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  x1 = (UINT64_C(0xfffffffffffda) - (arg1[0]));
+  x2 = (UINT64_C(0xffffffffffffe) - (arg1[1]));
+  x3 = (UINT64_C(0xffffffffffffe) - (arg1[2]));
+  x4 = (UINT64_C(0xffffffffffffe) - (arg1[3]));
+  x5 = (UINT64_C(0xffffffffffffe) - (arg1[4]));
   out1[0] = x1;
   out1[1] = x2;
   out1[2] = x3;
@@ -330,6 +473,7 @@
 
 /*
  * The function fiat_25519_selectznz is a multi-limb conditional select.
+ *
  * Postconditions:
  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
  *
@@ -340,16 +484,16 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_25519_selectznz(uint64_t out1[5], fiat_25519_uint1 arg1, const uint64_t arg2[5], const uint64_t arg3[5]) {
+static FIAT_25519_FIAT_INLINE void fiat_25519_selectznz(uint64_t out1[5], fiat_25519_uint1 arg1, const uint64_t arg2[5], const uint64_t arg3[5]) {
   uint64_t x1;
-  fiat_25519_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
   uint64_t x2;
-  fiat_25519_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
   uint64_t x3;
-  fiat_25519_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
   uint64_t x4;
-  fiat_25519_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
   uint64_t x5;
+  fiat_25519_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
+  fiat_25519_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
+  fiat_25519_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
+  fiat_25519_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
   fiat_25519_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4]));
   out1[0] = x1;
   out1[1] = x2;
@@ -360,260 +504,469 @@
 
 /*
  * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order.
+ *
  * Postconditions:
  *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  * Output Bounds:
  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
  */
-static void fiat_25519_to_bytes(uint8_t out1[32], const uint64_t arg1[5]) {
+static FIAT_25519_FIAT_INLINE void fiat_25519_to_bytes(uint8_t out1[32], const fiat_25519_tight_field_element arg1) {
   uint64_t x1;
   fiat_25519_uint1 x2;
-  fiat_25519_subborrowx_u51(&x1, &x2, 0x0, (arg1[0]), UINT64_C(0x7ffffffffffed));
   uint64_t x3;
   fiat_25519_uint1 x4;
-  fiat_25519_subborrowx_u51(&x3, &x4, x2, (arg1[1]), UINT64_C(0x7ffffffffffff));
   uint64_t x5;
   fiat_25519_uint1 x6;
-  fiat_25519_subborrowx_u51(&x5, &x6, x4, (arg1[2]), UINT64_C(0x7ffffffffffff));
   uint64_t x7;
   fiat_25519_uint1 x8;
-  fiat_25519_subborrowx_u51(&x7, &x8, x6, (arg1[3]), UINT64_C(0x7ffffffffffff));
   uint64_t x9;
   fiat_25519_uint1 x10;
-  fiat_25519_subborrowx_u51(&x9, &x10, x8, (arg1[4]), UINT64_C(0x7ffffffffffff));
   uint64_t x11;
-  fiat_25519_cmovznz_u64(&x11, x10, 0x0, UINT64_C(0xffffffffffffffff));
   uint64_t x12;
   fiat_25519_uint1 x13;
-  fiat_25519_addcarryx_u51(&x12, &x13, 0x0, x1, (x11 & UINT64_C(0x7ffffffffffed)));
   uint64_t x14;
   fiat_25519_uint1 x15;
-  fiat_25519_addcarryx_u51(&x14, &x15, x13, x3, (x11 & UINT64_C(0x7ffffffffffff)));
   uint64_t x16;
   fiat_25519_uint1 x17;
-  fiat_25519_addcarryx_u51(&x16, &x17, x15, x5, (x11 & UINT64_C(0x7ffffffffffff)));
   uint64_t x18;
   fiat_25519_uint1 x19;
-  fiat_25519_addcarryx_u51(&x18, &x19, x17, x7, (x11 & UINT64_C(0x7ffffffffffff)));
   uint64_t x20;
   fiat_25519_uint1 x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  uint8_t x26;
+  uint64_t x27;
+  uint8_t x28;
+  uint64_t x29;
+  uint8_t x30;
+  uint64_t x31;
+  uint8_t x32;
+  uint64_t x33;
+  uint8_t x34;
+  uint64_t x35;
+  uint8_t x36;
+  uint8_t x37;
+  uint64_t x38;
+  uint8_t x39;
+  uint64_t x40;
+  uint8_t x41;
+  uint64_t x42;
+  uint8_t x43;
+  uint64_t x44;
+  uint8_t x45;
+  uint64_t x46;
+  uint8_t x47;
+  uint64_t x48;
+  uint8_t x49;
+  uint8_t x50;
+  uint64_t x51;
+  uint8_t x52;
+  uint64_t x53;
+  uint8_t x54;
+  uint64_t x55;
+  uint8_t x56;
+  uint64_t x57;
+  uint8_t x58;
+  uint64_t x59;
+  uint8_t x60;
+  uint64_t x61;
+  uint8_t x62;
+  uint64_t x63;
+  uint8_t x64;
+  fiat_25519_uint1 x65;
+  uint64_t x66;
+  uint8_t x67;
+  uint64_t x68;
+  uint8_t x69;
+  uint64_t x70;
+  uint8_t x71;
+  uint64_t x72;
+  uint8_t x73;
+  uint64_t x74;
+  uint8_t x75;
+  uint64_t x76;
+  uint8_t x77;
+  uint8_t x78;
+  uint64_t x79;
+  uint8_t x80;
+  uint64_t x81;
+  uint8_t x82;
+  uint64_t x83;
+  uint8_t x84;
+  uint64_t x85;
+  uint8_t x86;
+  uint64_t x87;
+  uint8_t x88;
+  uint64_t x89;
+  uint8_t x90;
+  uint8_t x91;
+  fiat_25519_subborrowx_u51(&x1, &x2, 0x0, (arg1[0]), UINT64_C(0x7ffffffffffed));
+  fiat_25519_subborrowx_u51(&x3, &x4, x2, (arg1[1]), UINT64_C(0x7ffffffffffff));
+  fiat_25519_subborrowx_u51(&x5, &x6, x4, (arg1[2]), UINT64_C(0x7ffffffffffff));
+  fiat_25519_subborrowx_u51(&x7, &x8, x6, (arg1[3]), UINT64_C(0x7ffffffffffff));
+  fiat_25519_subborrowx_u51(&x9, &x10, x8, (arg1[4]), UINT64_C(0x7ffffffffffff));
+  fiat_25519_cmovznz_u64(&x11, x10, 0x0, UINT64_C(0xffffffffffffffff));
+  fiat_25519_addcarryx_u51(&x12, &x13, 0x0, x1, (x11 & UINT64_C(0x7ffffffffffed)));
+  fiat_25519_addcarryx_u51(&x14, &x15, x13, x3, (x11 & UINT64_C(0x7ffffffffffff)));
+  fiat_25519_addcarryx_u51(&x16, &x17, x15, x5, (x11 & UINT64_C(0x7ffffffffffff)));
+  fiat_25519_addcarryx_u51(&x18, &x19, x17, x7, (x11 & UINT64_C(0x7ffffffffffff)));
   fiat_25519_addcarryx_u51(&x20, &x21, x19, x9, (x11 & UINT64_C(0x7ffffffffffff)));
-  uint64_t x22 = (x20 << 4);
-  uint64_t x23 = (x18 * (uint64_t)0x2);
-  uint64_t x24 = (x16 << 6);
-  uint64_t x25 = (x14 << 3);
-  uint64_t x26 = (x12 >> 8);
-  uint8_t x27 = (uint8_t)(x12 & UINT8_C(0xff));
-  uint64_t x28 = (x26 >> 8);
-  uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff));
-  uint64_t x30 = (x28 >> 8);
-  uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff));
-  uint64_t x32 = (x30 >> 8);
-  uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
-  uint64_t x34 = (x32 >> 8);
-  uint8_t x35 = (uint8_t)(x32 & UINT8_C(0xff));
-  uint8_t x36 = (uint8_t)(x34 >> 8);
-  uint8_t x37 = (uint8_t)(x34 & UINT8_C(0xff));
-  uint64_t x38 = (x36 + x25);
-  uint64_t x39 = (x38 >> 8);
-  uint8_t x40 = (uint8_t)(x38 & UINT8_C(0xff));
-  uint64_t x41 = (x39 >> 8);
-  uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
-  uint64_t x43 = (x41 >> 8);
-  uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff));
-  uint64_t x45 = (x43 >> 8);
-  uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff));
-  uint64_t x47 = (x45 >> 8);
-  uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff));
-  uint8_t x49 = (uint8_t)(x47 >> 8);
-  uint8_t x50 = (uint8_t)(x47 & UINT8_C(0xff));
-  uint64_t x51 = (x49 + x24);
-  uint64_t x52 = (x51 >> 8);
-  uint8_t x53 = (uint8_t)(x51 & UINT8_C(0xff));
-  uint64_t x54 = (x52 >> 8);
-  uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
-  uint64_t x56 = (x54 >> 8);
-  uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff));
-  uint64_t x58 = (x56 >> 8);
-  uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff));
-  uint64_t x60 = (x58 >> 8);
-  uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
-  uint64_t x62 = (x60 >> 8);
-  uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
-  fiat_25519_uint1 x64 = (fiat_25519_uint1)(x62 >> 8);
-  uint8_t x65 = (uint8_t)(x62 & UINT8_C(0xff));
-  uint64_t x66 = (x64 + x23);
-  uint64_t x67 = (x66 >> 8);
-  uint8_t x68 = (uint8_t)(x66 & UINT8_C(0xff));
-  uint64_t x69 = (x67 >> 8);
-  uint8_t x70 = (uint8_t)(x67 & UINT8_C(0xff));
-  uint64_t x71 = (x69 >> 8);
-  uint8_t x72 = (uint8_t)(x69 & UINT8_C(0xff));
-  uint64_t x73 = (x71 >> 8);
-  uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff));
-  uint64_t x75 = (x73 >> 8);
-  uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff));
-  uint8_t x77 = (uint8_t)(x75 >> 8);
-  uint8_t x78 = (uint8_t)(x75 & UINT8_C(0xff));
-  uint64_t x79 = (x77 + x22);
-  uint64_t x80 = (x79 >> 8);
-  uint8_t x81 = (uint8_t)(x79 & UINT8_C(0xff));
-  uint64_t x82 = (x80 >> 8);
-  uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff));
-  uint64_t x84 = (x82 >> 8);
-  uint8_t x85 = (uint8_t)(x82 & UINT8_C(0xff));
-  uint64_t x86 = (x84 >> 8);
-  uint8_t x87 = (uint8_t)(x84 & UINT8_C(0xff));
-  uint64_t x88 = (x86 >> 8);
-  uint8_t x89 = (uint8_t)(x86 & UINT8_C(0xff));
-  uint8_t x90 = (uint8_t)(x88 >> 8);
-  uint8_t x91 = (uint8_t)(x88 & UINT8_C(0xff));
-  out1[0] = x27;
-  out1[1] = x29;
-  out1[2] = x31;
-  out1[3] = x33;
-  out1[4] = x35;
-  out1[5] = x37;
-  out1[6] = x40;
-  out1[7] = x42;
-  out1[8] = x44;
-  out1[9] = x46;
-  out1[10] = x48;
-  out1[11] = x50;
-  out1[12] = x53;
-  out1[13] = x55;
-  out1[14] = x57;
-  out1[15] = x59;
-  out1[16] = x61;
-  out1[17] = x63;
-  out1[18] = x65;
-  out1[19] = x68;
-  out1[20] = x70;
-  out1[21] = x72;
-  out1[22] = x74;
-  out1[23] = x76;
-  out1[24] = x78;
-  out1[25] = x81;
-  out1[26] = x83;
-  out1[27] = x85;
-  out1[28] = x87;
-  out1[29] = x89;
-  out1[30] = x91;
-  out1[31] = x90;
+  x22 = (x20 << 4);
+  x23 = (x18 * (uint64_t)0x2);
+  x24 = (x16 << 6);
+  x25 = (x14 << 3);
+  x26 = (uint8_t)(x12 & UINT8_C(0xff));
+  x27 = (x12 >> 8);
+  x28 = (uint8_t)(x27 & UINT8_C(0xff));
+  x29 = (x27 >> 8);
+  x30 = (uint8_t)(x29 & UINT8_C(0xff));
+  x31 = (x29 >> 8);
+  x32 = (uint8_t)(x31 & UINT8_C(0xff));
+  x33 = (x31 >> 8);
+  x34 = (uint8_t)(x33 & UINT8_C(0xff));
+  x35 = (x33 >> 8);
+  x36 = (uint8_t)(x35 & UINT8_C(0xff));
+  x37 = (uint8_t)(x35 >> 8);
+  x38 = (x25 + (uint64_t)x37);
+  x39 = (uint8_t)(x38 & UINT8_C(0xff));
+  x40 = (x38 >> 8);
+  x41 = (uint8_t)(x40 & UINT8_C(0xff));
+  x42 = (x40 >> 8);
+  x43 = (uint8_t)(x42 & UINT8_C(0xff));
+  x44 = (x42 >> 8);
+  x45 = (uint8_t)(x44 & UINT8_C(0xff));
+  x46 = (x44 >> 8);
+  x47 = (uint8_t)(x46 & UINT8_C(0xff));
+  x48 = (x46 >> 8);
+  x49 = (uint8_t)(x48 & UINT8_C(0xff));
+  x50 = (uint8_t)(x48 >> 8);
+  x51 = (x24 + (uint64_t)x50);
+  x52 = (uint8_t)(x51 & UINT8_C(0xff));
+  x53 = (x51 >> 8);
+  x54 = (uint8_t)(x53 & UINT8_C(0xff));
+  x55 = (x53 >> 8);
+  x56 = (uint8_t)(x55 & UINT8_C(0xff));
+  x57 = (x55 >> 8);
+  x58 = (uint8_t)(x57 & UINT8_C(0xff));
+  x59 = (x57 >> 8);
+  x60 = (uint8_t)(x59 & UINT8_C(0xff));
+  x61 = (x59 >> 8);
+  x62 = (uint8_t)(x61 & UINT8_C(0xff));
+  x63 = (x61 >> 8);
+  x64 = (uint8_t)(x63 & UINT8_C(0xff));
+  x65 = (fiat_25519_uint1)(x63 >> 8);
+  x66 = (x23 + (uint64_t)x65);
+  x67 = (uint8_t)(x66 & UINT8_C(0xff));
+  x68 = (x66 >> 8);
+  x69 = (uint8_t)(x68 & UINT8_C(0xff));
+  x70 = (x68 >> 8);
+  x71 = (uint8_t)(x70 & UINT8_C(0xff));
+  x72 = (x70 >> 8);
+  x73 = (uint8_t)(x72 & UINT8_C(0xff));
+  x74 = (x72 >> 8);
+  x75 = (uint8_t)(x74 & UINT8_C(0xff));
+  x76 = (x74 >> 8);
+  x77 = (uint8_t)(x76 & UINT8_C(0xff));
+  x78 = (uint8_t)(x76 >> 8);
+  x79 = (x22 + (uint64_t)x78);
+  x80 = (uint8_t)(x79 & UINT8_C(0xff));
+  x81 = (x79 >> 8);
+  x82 = (uint8_t)(x81 & UINT8_C(0xff));
+  x83 = (x81 >> 8);
+  x84 = (uint8_t)(x83 & UINT8_C(0xff));
+  x85 = (x83 >> 8);
+  x86 = (uint8_t)(x85 & UINT8_C(0xff));
+  x87 = (x85 >> 8);
+  x88 = (uint8_t)(x87 & UINT8_C(0xff));
+  x89 = (x87 >> 8);
+  x90 = (uint8_t)(x89 & UINT8_C(0xff));
+  x91 = (uint8_t)(x89 >> 8);
+  out1[0] = x26;
+  out1[1] = x28;
+  out1[2] = x30;
+  out1[3] = x32;
+  out1[4] = x34;
+  out1[5] = x36;
+  out1[6] = x39;
+  out1[7] = x41;
+  out1[8] = x43;
+  out1[9] = x45;
+  out1[10] = x47;
+  out1[11] = x49;
+  out1[12] = x52;
+  out1[13] = x54;
+  out1[14] = x56;
+  out1[15] = x58;
+  out1[16] = x60;
+  out1[17] = x62;
+  out1[18] = x64;
+  out1[19] = x67;
+  out1[20] = x69;
+  out1[21] = x71;
+  out1[22] = x73;
+  out1[23] = x75;
+  out1[24] = x77;
+  out1[25] = x80;
+  out1[26] = x82;
+  out1[27] = x84;
+  out1[28] = x86;
+  out1[29] = x88;
+  out1[30] = x90;
+  out1[31] = x91;
 }
 
 /*
  * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order.
+ *
  * Postconditions:
  *   eval out1 mod m = bytes_eval arg1 mod m
  *
  * Input Bounds:
  *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  */
-static void fiat_25519_from_bytes(uint64_t out1[5], const uint8_t arg1[32]) {
-  uint64_t x1 = ((uint64_t)(arg1[31]) << 44);
-  uint64_t x2 = ((uint64_t)(arg1[30]) << 36);
-  uint64_t x3 = ((uint64_t)(arg1[29]) << 28);
-  uint64_t x4 = ((uint64_t)(arg1[28]) << 20);
-  uint64_t x5 = ((uint64_t)(arg1[27]) << 12);
-  uint64_t x6 = ((uint64_t)(arg1[26]) << 4);
-  uint64_t x7 = ((uint64_t)(arg1[25]) << 47);
-  uint64_t x8 = ((uint64_t)(arg1[24]) << 39);
-  uint64_t x9 = ((uint64_t)(arg1[23]) << 31);
-  uint64_t x10 = ((uint64_t)(arg1[22]) << 23);
-  uint64_t x11 = ((uint64_t)(arg1[21]) << 15);
-  uint64_t x12 = ((uint64_t)(arg1[20]) << 7);
-  uint64_t x13 = ((uint64_t)(arg1[19]) << 50);
-  uint64_t x14 = ((uint64_t)(arg1[18]) << 42);
-  uint64_t x15 = ((uint64_t)(arg1[17]) << 34);
-  uint64_t x16 = ((uint64_t)(arg1[16]) << 26);
-  uint64_t x17 = ((uint64_t)(arg1[15]) << 18);
-  uint64_t x18 = ((uint64_t)(arg1[14]) << 10);
-  uint64_t x19 = ((uint64_t)(arg1[13]) << 2);
-  uint64_t x20 = ((uint64_t)(arg1[12]) << 45);
-  uint64_t x21 = ((uint64_t)(arg1[11]) << 37);
-  uint64_t x22 = ((uint64_t)(arg1[10]) << 29);
-  uint64_t x23 = ((uint64_t)(arg1[9]) << 21);
-  uint64_t x24 = ((uint64_t)(arg1[8]) << 13);
-  uint64_t x25 = ((uint64_t)(arg1[7]) << 5);
-  uint64_t x26 = ((uint64_t)(arg1[6]) << 48);
-  uint64_t x27 = ((uint64_t)(arg1[5]) << 40);
-  uint64_t x28 = ((uint64_t)(arg1[4]) << 32);
-  uint64_t x29 = ((uint64_t)(arg1[3]) << 24);
-  uint64_t x30 = ((uint64_t)(arg1[2]) << 16);
-  uint64_t x31 = ((uint64_t)(arg1[1]) << 8);
-  uint8_t x32 = (arg1[0]);
-  uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + x26))))));
-  uint8_t x34 = (uint8_t)(x33 >> 51);
-  uint64_t x35 = (x33 & UINT64_C(0x7ffffffffffff));
-  uint64_t x36 = (x6 + (x5 + (x4 + (x3 + (x2 + x1)))));
-  uint64_t x37 = (x12 + (x11 + (x10 + (x9 + (x8 + x7)))));
-  uint64_t x38 = (x19 + (x18 + (x17 + (x16 + (x15 + (x14 + x13))))));
-  uint64_t x39 = (x25 + (x24 + (x23 + (x22 + (x21 + x20)))));
-  uint64_t x40 = (x34 + x39);
-  uint8_t x41 = (uint8_t)(x40 >> 51);
-  uint64_t x42 = (x40 & UINT64_C(0x7ffffffffffff));
-  uint64_t x43 = (x41 + x38);
-  uint8_t x44 = (uint8_t)(x43 >> 51);
-  uint64_t x45 = (x43 & UINT64_C(0x7ffffffffffff));
-  uint64_t x46 = (x44 + x37);
-  uint8_t x47 = (uint8_t)(x46 >> 51);
-  uint64_t x48 = (x46 & UINT64_C(0x7ffffffffffff));
-  uint64_t x49 = (x47 + x36);
-  out1[0] = x35;
-  out1[1] = x42;
-  out1[2] = x45;
-  out1[3] = x48;
-  out1[4] = x49;
+static FIAT_25519_FIAT_INLINE void fiat_25519_from_bytes(fiat_25519_tight_field_element out1, const uint8_t arg1[32]) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  uint64_t x13;
+  uint64_t x14;
+  uint64_t x15;
+  uint64_t x16;
+  uint64_t x17;
+  uint64_t x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  uint64_t x26;
+  uint64_t x27;
+  uint64_t x28;
+  uint64_t x29;
+  uint64_t x30;
+  uint64_t x31;
+  uint8_t x32;
+  uint64_t x33;
+  uint64_t x34;
+  uint64_t x35;
+  uint64_t x36;
+  uint64_t x37;
+  uint64_t x38;
+  uint64_t x39;
+  uint8_t x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  uint8_t x48;
+  uint64_t x49;
+  uint64_t x50;
+  uint64_t x51;
+  uint64_t x52;
+  uint64_t x53;
+  uint64_t x54;
+  uint64_t x55;
+  uint64_t x56;
+  uint8_t x57;
+  uint64_t x58;
+  uint64_t x59;
+  uint64_t x60;
+  uint64_t x61;
+  uint64_t x62;
+  uint64_t x63;
+  uint64_t x64;
+  uint8_t x65;
+  uint64_t x66;
+  uint64_t x67;
+  uint64_t x68;
+  uint64_t x69;
+  uint64_t x70;
+  uint64_t x71;
+  x1 = ((uint64_t)(arg1[31]) << 44);
+  x2 = ((uint64_t)(arg1[30]) << 36);
+  x3 = ((uint64_t)(arg1[29]) << 28);
+  x4 = ((uint64_t)(arg1[28]) << 20);
+  x5 = ((uint64_t)(arg1[27]) << 12);
+  x6 = ((uint64_t)(arg1[26]) << 4);
+  x7 = ((uint64_t)(arg1[25]) << 47);
+  x8 = ((uint64_t)(arg1[24]) << 39);
+  x9 = ((uint64_t)(arg1[23]) << 31);
+  x10 = ((uint64_t)(arg1[22]) << 23);
+  x11 = ((uint64_t)(arg1[21]) << 15);
+  x12 = ((uint64_t)(arg1[20]) << 7);
+  x13 = ((uint64_t)(arg1[19]) << 50);
+  x14 = ((uint64_t)(arg1[18]) << 42);
+  x15 = ((uint64_t)(arg1[17]) << 34);
+  x16 = ((uint64_t)(arg1[16]) << 26);
+  x17 = ((uint64_t)(arg1[15]) << 18);
+  x18 = ((uint64_t)(arg1[14]) << 10);
+  x19 = ((uint64_t)(arg1[13]) << 2);
+  x20 = ((uint64_t)(arg1[12]) << 45);
+  x21 = ((uint64_t)(arg1[11]) << 37);
+  x22 = ((uint64_t)(arg1[10]) << 29);
+  x23 = ((uint64_t)(arg1[9]) << 21);
+  x24 = ((uint64_t)(arg1[8]) << 13);
+  x25 = ((uint64_t)(arg1[7]) << 5);
+  x26 = ((uint64_t)(arg1[6]) << 48);
+  x27 = ((uint64_t)(arg1[5]) << 40);
+  x28 = ((uint64_t)(arg1[4]) << 32);
+  x29 = ((uint64_t)(arg1[3]) << 24);
+  x30 = ((uint64_t)(arg1[2]) << 16);
+  x31 = ((uint64_t)(arg1[1]) << 8);
+  x32 = (arg1[0]);
+  x33 = (x31 + (uint64_t)x32);
+  x34 = (x30 + x33);
+  x35 = (x29 + x34);
+  x36 = (x28 + x35);
+  x37 = (x27 + x36);
+  x38 = (x26 + x37);
+  x39 = (x38 & UINT64_C(0x7ffffffffffff));
+  x40 = (uint8_t)(x38 >> 51);
+  x41 = (x25 + (uint64_t)x40);
+  x42 = (x24 + x41);
+  x43 = (x23 + x42);
+  x44 = (x22 + x43);
+  x45 = (x21 + x44);
+  x46 = (x20 + x45);
+  x47 = (x46 & UINT64_C(0x7ffffffffffff));
+  x48 = (uint8_t)(x46 >> 51);
+  x49 = (x19 + (uint64_t)x48);
+  x50 = (x18 + x49);
+  x51 = (x17 + x50);
+  x52 = (x16 + x51);
+  x53 = (x15 + x52);
+  x54 = (x14 + x53);
+  x55 = (x13 + x54);
+  x56 = (x55 & UINT64_C(0x7ffffffffffff));
+  x57 = (uint8_t)(x55 >> 51);
+  x58 = (x12 + (uint64_t)x57);
+  x59 = (x11 + x58);
+  x60 = (x10 + x59);
+  x61 = (x9 + x60);
+  x62 = (x8 + x61);
+  x63 = (x7 + x62);
+  x64 = (x63 & UINT64_C(0x7ffffffffffff));
+  x65 = (uint8_t)(x63 >> 51);
+  x66 = (x6 + (uint64_t)x65);
+  x67 = (x5 + x66);
+  x68 = (x4 + x67);
+  x69 = (x3 + x68);
+  x70 = (x2 + x69);
+  x71 = (x1 + x70);
+  out1[0] = x39;
+  out1[1] = x47;
+  out1[2] = x56;
+  out1[3] = x64;
+  out1[4] = x71;
+}
+
+/*
+ * The function fiat_25519_relax is the identity function converting from tight field elements to loose field elements.
+ *
+ * Postconditions:
+ *   out1 = arg1
+ *
+ */
+static FIAT_25519_FIAT_INLINE void fiat_25519_relax(fiat_25519_loose_field_element out1, const fiat_25519_tight_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  x1 = (arg1[0]);
+  x2 = (arg1[1]);
+  x3 = (arg1[2]);
+  x4 = (arg1[3]);
+  x5 = (arg1[4]);
+  out1[0] = x1;
+  out1[1] = x2;
+  out1[2] = x3;
+  out1[3] = x4;
+  out1[4] = x5;
 }
 
 /*
  * The function fiat_25519_carry_scmul_121666 multiplies a field element by 121666 and reduces the result.
+ *
  * Postconditions:
  *   eval out1 mod m = (121666 * eval arg1) mod m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664], [0x0 ~> 0x1a666666666664]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc], [0x0 ~> 0x8cccccccccccc]]
  */
-static void fiat_25519_carry_scmul_121666(uint64_t out1[5], const uint64_t arg1[5]) {
-  fiat_25519_uint128 x1 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[4]));
-  fiat_25519_uint128 x2 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[3]));
-  fiat_25519_uint128 x3 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[2]));
-  fiat_25519_uint128 x4 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[1]));
-  fiat_25519_uint128 x5 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[0]));
-  uint64_t x6 = (uint64_t)(x5 >> 51);
-  uint64_t x7 = (uint64_t)(x5 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x8 = (x6 + x4);
-  uint64_t x9 = (uint64_t)(x8 >> 51);
-  uint64_t x10 = (uint64_t)(x8 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x11 = (x9 + x3);
-  uint64_t x12 = (uint64_t)(x11 >> 51);
-  uint64_t x13 = (uint64_t)(x11 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x14 = (x12 + x2);
-  uint64_t x15 = (uint64_t)(x14 >> 51);
-  uint64_t x16 = (uint64_t)(x14 & UINT64_C(0x7ffffffffffff));
-  fiat_25519_uint128 x17 = (x15 + x1);
-  uint64_t x18 = (uint64_t)(x17 >> 51);
-  uint64_t x19 = (uint64_t)(x17 & UINT64_C(0x7ffffffffffff));
-  uint64_t x20 = (x18 * UINT8_C(0x13));
-  uint64_t x21 = (x7 + x20);
-  fiat_25519_uint1 x22 = (fiat_25519_uint1)(x21 >> 51);
-  uint64_t x23 = (x21 & UINT64_C(0x7ffffffffffff));
-  uint64_t x24 = (x22 + x10);
-  fiat_25519_uint1 x25 = (fiat_25519_uint1)(x24 >> 51);
-  uint64_t x26 = (x24 & UINT64_C(0x7ffffffffffff));
-  uint64_t x27 = (x25 + x13);
+static FIAT_25519_FIAT_INLINE void fiat_25519_carry_scmul_121666(fiat_25519_tight_field_element out1, const fiat_25519_loose_field_element arg1) {
+  fiat_25519_uint128 x1;
+  fiat_25519_uint128 x2;
+  fiat_25519_uint128 x3;
+  fiat_25519_uint128 x4;
+  fiat_25519_uint128 x5;
+  uint64_t x6;
+  uint64_t x7;
+  fiat_25519_uint128 x8;
+  uint64_t x9;
+  uint64_t x10;
+  fiat_25519_uint128 x11;
+  uint64_t x12;
+  uint64_t x13;
+  fiat_25519_uint128 x14;
+  uint64_t x15;
+  uint64_t x16;
+  fiat_25519_uint128 x17;
+  uint64_t x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  fiat_25519_uint1 x22;
+  uint64_t x23;
+  uint64_t x24;
+  fiat_25519_uint1 x25;
+  uint64_t x26;
+  uint64_t x27;
+  x1 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[4]));
+  x2 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[3]));
+  x3 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[2]));
+  x4 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[1]));
+  x5 = ((fiat_25519_uint128)UINT32_C(0x1db42) * (arg1[0]));
+  x6 = (uint64_t)(x5 >> 51);
+  x7 = (uint64_t)(x5 & UINT64_C(0x7ffffffffffff));
+  x8 = (x6 + x4);
+  x9 = (uint64_t)(x8 >> 51);
+  x10 = (uint64_t)(x8 & UINT64_C(0x7ffffffffffff));
+  x11 = (x9 + x3);
+  x12 = (uint64_t)(x11 >> 51);
+  x13 = (uint64_t)(x11 & UINT64_C(0x7ffffffffffff));
+  x14 = (x12 + x2);
+  x15 = (uint64_t)(x14 >> 51);
+  x16 = (uint64_t)(x14 & UINT64_C(0x7ffffffffffff));
+  x17 = (x15 + x1);
+  x18 = (uint64_t)(x17 >> 51);
+  x19 = (uint64_t)(x17 & UINT64_C(0x7ffffffffffff));
+  x20 = (x18 * UINT8_C(0x13));
+  x21 = (x7 + x20);
+  x22 = (fiat_25519_uint1)(x21 >> 51);
+  x23 = (x21 & UINT64_C(0x7ffffffffffff));
+  x24 = (x22 + x10);
+  x25 = (fiat_25519_uint1)(x24 >> 51);
+  x26 = (x24 & UINT64_C(0x7ffffffffffff));
+  x27 = (x25 + x13);
   out1[0] = x23;
   out1[1] = x26;
   out1[2] = x27;
   out1[3] = x16;
   out1[4] = x19;
 }
-
diff --git a/src/third_party/fiat/p256_32.h b/src/third_party/fiat/p256_32.h
index 504da42..3812d8c 100644
--- a/src/third_party/fiat/p256_32.h
+++ b/src/third_party/fiat/p256_32.h
@@ -1,8 +1,8 @@
-/* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 32 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
+/* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 32 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */
 /* curve description: p256 */
-/* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
-/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
 /* machine_wordsize = 32 (from "32") */
+/* requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp */
+/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
 /*                                                                    */
 /* NOTE: In addition to the bounds specified above each function, all */
 /*   functions synthesized for this Montgomery arithmetic require the */
@@ -10,18 +10,47 @@
 /*   require the input to be in the unique saturated representation.  */
 /*   All functions also ensure that these two properties are true of  */
 /*   return values.                                                   */
+/*  */
+/* Computed values: */
+/*   eval z = z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) */
+/*   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
+/*   twos_complement_eval z = let x1 := z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) in */
+/*                            if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */
 
 #include <stdint.h>
 typedef unsigned char fiat_p256_uint1;
 typedef signed char fiat_p256_int1;
+#if defined(__GNUC__) || defined(__clang__)
+#  define FIAT_P256_FIAT_INLINE __inline__
+#else
+#  define FIAT_P256_FIAT_INLINE
+#endif
+
+/* The type fiat_p256_montgomery_domain_field_element is a field element in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */
+typedef uint32_t fiat_p256_montgomery_domain_field_element[8];
+
+/* The type fiat_p256_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */
+typedef uint32_t fiat_p256_non_montgomery_domain_field_element[8];
 
 #if (-1 & 3) != 3
 #error "This code only works on a two's complement system"
 #endif
 
+#if !defined(FIAT_P256_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint32_t fiat_p256_value_barrier_u32(uint32_t a) {
+  __asm__("" : "+r"(a) : /* no inputs */);
+  return a;
+}
+#else
+#  define fiat_p256_value_barrier_u32(x) (x)
+#endif
+
 
 /*
  * The function fiat_p256_addcarryx_u32 is an addition with carry.
+ *
  * Postconditions:
  *   out1 = (arg1 + arg2 + arg3) mod 2^32
  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^32⌋
@@ -34,16 +63,20 @@
  *   out1: [0x0 ~> 0xffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_p256_addcarryx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  uint64_t x1 = ((arg1 + (uint64_t)arg2) + arg3);
-  uint32_t x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
-  fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 32);
+static FIAT_P256_FIAT_INLINE void fiat_p256_addcarryx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  uint64_t x1;
+  uint32_t x2;
+  fiat_p256_uint1 x3;
+  x1 = ((arg1 + (uint64_t)arg2) + arg3);
+  x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+  x3 = (fiat_p256_uint1)(x1 >> 32);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_p256_subborrowx_u32 is a subtraction with borrow.
+ *
  * Postconditions:
  *   out1 = (-arg1 + arg2 + -arg3) mod 2^32
  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^32⌋
@@ -56,16 +89,20 @@
  *   out1: [0x0 ~> 0xffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_p256_subborrowx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  int64_t x1 = ((arg2 - (int64_t)arg1) - arg3);
-  fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 32);
-  uint32_t x3 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+static FIAT_P256_FIAT_INLINE void fiat_p256_subborrowx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  int64_t x1;
+  fiat_p256_int1 x2;
+  uint32_t x3;
+  x1 = ((arg2 - (int64_t)arg1) - arg3);
+  x2 = (fiat_p256_int1)(x1 >> 32);
+  x3 = (uint32_t)(x1 & UINT32_C(0xffffffff));
   *out1 = x3;
   *out2 = (fiat_p256_uint1)(0x0 - x2);
 }
 
 /*
  * The function fiat_p256_mulx_u32 is a multiplication, returning the full double-width result.
+ *
  * Postconditions:
  *   out1 = (arg1 * arg2) mod 2^32
  *   out2 = ⌊arg1 * arg2 / 2^32⌋
@@ -77,16 +114,20 @@
  *   out1: [0x0 ~> 0xffffffff]
  *   out2: [0x0 ~> 0xffffffff]
  */
-static void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, uint32_t arg2) {
-  uint64_t x1 = ((uint64_t)arg1 * arg2);
-  uint32_t x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
-  uint32_t x3 = (uint32_t)(x1 >> 32);
+static FIAT_P256_FIAT_INLINE void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, uint32_t arg2) {
+  uint64_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  x1 = ((uint64_t)arg1 * arg2);
+  x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+  x3 = (uint32_t)(x1 >> 32);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_p256_cmovznz_u32 is a single-word conditional move.
+ *
  * Postconditions:
  *   out1 = (if arg1 = 0 then arg2 else arg3)
  *
@@ -97,21 +138,19 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffff]
  */
-static void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
-  fiat_p256_uint1 x1 = (!(!arg1));
-  uint32_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff));
-  // Note this line has been patched from the synthesized code to add value
-  // barriers.
-  //
-  // Clang recognizes this pattern as a select. While it usually transforms it
-  // to a cmov, it sometimes further transforms it into a branch, which we do
-  // not want.
-  uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2));
+static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
+  fiat_p256_uint1 x1;
+  uint32_t x2;
+  uint32_t x3;
+  x1 = (!(!arg1));
+  x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff));
+  x3 = ((fiat_p256_value_barrier_u32(x2) & arg3) | (fiat_p256_value_barrier_u32((~x2)) & arg2));
   *out1 = x3;
 }
 
 /*
  * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -119,995 +158,1021 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_mul(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
-  uint32_t x1 = (arg1[1]);
-  uint32_t x2 = (arg1[2]);
-  uint32_t x3 = (arg1[3]);
-  uint32_t x4 = (arg1[4]);
-  uint32_t x5 = (arg1[5]);
-  uint32_t x6 = (arg1[6]);
-  uint32_t x7 = (arg1[7]);
-  uint32_t x8 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
   uint32_t x9;
   uint32_t x10;
-  fiat_p256_mulx_u32(&x9, &x10, x8, (arg2[7]));
   uint32_t x11;
   uint32_t x12;
-  fiat_p256_mulx_u32(&x11, &x12, x8, (arg2[6]));
   uint32_t x13;
   uint32_t x14;
-  fiat_p256_mulx_u32(&x13, &x14, x8, (arg2[5]));
   uint32_t x15;
   uint32_t x16;
-  fiat_p256_mulx_u32(&x15, &x16, x8, (arg2[4]));
   uint32_t x17;
   uint32_t x18;
-  fiat_p256_mulx_u32(&x17, &x18, x8, (arg2[3]));
   uint32_t x19;
   uint32_t x20;
-  fiat_p256_mulx_u32(&x19, &x20, x8, (arg2[2]));
   uint32_t x21;
   uint32_t x22;
-  fiat_p256_mulx_u32(&x21, &x22, x8, (arg2[1]));
   uint32_t x23;
   uint32_t x24;
-  fiat_p256_mulx_u32(&x23, &x24, x8, (arg2[0]));
   uint32_t x25;
   fiat_p256_uint1 x26;
-  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
   uint32_t x27;
   fiat_p256_uint1 x28;
-  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
   uint32_t x29;
   fiat_p256_uint1 x30;
-  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
   uint32_t x31;
   fiat_p256_uint1 x32;
-  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
   uint32_t x33;
   fiat_p256_uint1 x34;
-  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
   uint32_t x35;
   fiat_p256_uint1 x36;
-  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
   uint32_t x37;
   fiat_p256_uint1 x38;
-  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
-  uint32_t x39 = (x38 + x10);
+  uint32_t x39;
   uint32_t x40;
   uint32_t x41;
-  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
   uint32_t x42;
   uint32_t x43;
-  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
   uint32_t x44;
   uint32_t x45;
-  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
   uint32_t x46;
   uint32_t x47;
-  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
   uint32_t x48;
   fiat_p256_uint1 x49;
-  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
   uint32_t x50;
   fiat_p256_uint1 x51;
-  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
-  uint32_t x52 = (x51 + x43);
+  uint32_t x52;
   uint32_t x53;
   fiat_p256_uint1 x54;
-  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
   uint32_t x55;
   fiat_p256_uint1 x56;
-  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
   uint32_t x57;
   fiat_p256_uint1 x58;
-  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
   uint32_t x59;
   fiat_p256_uint1 x60;
-  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
   uint32_t x61;
   fiat_p256_uint1 x62;
-  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
   uint32_t x63;
   fiat_p256_uint1 x64;
-  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
   uint32_t x65;
   fiat_p256_uint1 x66;
-  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
   uint32_t x67;
   fiat_p256_uint1 x68;
-  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
   uint32_t x69;
   fiat_p256_uint1 x70;
-  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
   uint32_t x71;
   uint32_t x72;
-  fiat_p256_mulx_u32(&x71, &x72, x1, (arg2[7]));
   uint32_t x73;
   uint32_t x74;
-  fiat_p256_mulx_u32(&x73, &x74, x1, (arg2[6]));
   uint32_t x75;
   uint32_t x76;
-  fiat_p256_mulx_u32(&x75, &x76, x1, (arg2[5]));
   uint32_t x77;
   uint32_t x78;
-  fiat_p256_mulx_u32(&x77, &x78, x1, (arg2[4]));
   uint32_t x79;
   uint32_t x80;
-  fiat_p256_mulx_u32(&x79, &x80, x1, (arg2[3]));
   uint32_t x81;
   uint32_t x82;
-  fiat_p256_mulx_u32(&x81, &x82, x1, (arg2[2]));
   uint32_t x83;
   uint32_t x84;
-  fiat_p256_mulx_u32(&x83, &x84, x1, (arg2[1]));
   uint32_t x85;
   uint32_t x86;
-  fiat_p256_mulx_u32(&x85, &x86, x1, (arg2[0]));
   uint32_t x87;
   fiat_p256_uint1 x88;
-  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
   uint32_t x89;
   fiat_p256_uint1 x90;
-  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
   uint32_t x91;
   fiat_p256_uint1 x92;
-  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
   uint32_t x93;
   fiat_p256_uint1 x94;
-  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
   uint32_t x95;
   fiat_p256_uint1 x96;
-  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
   uint32_t x97;
   fiat_p256_uint1 x98;
-  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
   uint32_t x99;
   fiat_p256_uint1 x100;
-  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
-  uint32_t x101 = (x100 + x72);
+  uint32_t x101;
   uint32_t x102;
   fiat_p256_uint1 x103;
-  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
   uint32_t x104;
   fiat_p256_uint1 x105;
-  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
   uint32_t x106;
   fiat_p256_uint1 x107;
-  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
   uint32_t x108;
   fiat_p256_uint1 x109;
-  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
   uint32_t x110;
   fiat_p256_uint1 x111;
-  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
   uint32_t x112;
   fiat_p256_uint1 x113;
-  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
   uint32_t x114;
   fiat_p256_uint1 x115;
-  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
   uint32_t x116;
   fiat_p256_uint1 x117;
-  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
   uint32_t x118;
   fiat_p256_uint1 x119;
-  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
   uint32_t x120;
   uint32_t x121;
-  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
   uint32_t x122;
   uint32_t x123;
-  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
   uint32_t x124;
   uint32_t x125;
-  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
   uint32_t x126;
   uint32_t x127;
-  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
   uint32_t x128;
   fiat_p256_uint1 x129;
-  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
   uint32_t x130;
   fiat_p256_uint1 x131;
-  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
-  uint32_t x132 = (x131 + x123);
+  uint32_t x132;
   uint32_t x133;
   fiat_p256_uint1 x134;
-  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
   uint32_t x135;
   fiat_p256_uint1 x136;
-  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
   uint32_t x137;
   fiat_p256_uint1 x138;
-  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
   uint32_t x139;
   fiat_p256_uint1 x140;
-  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
   uint32_t x141;
   fiat_p256_uint1 x142;
-  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
   uint32_t x143;
   fiat_p256_uint1 x144;
-  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
   uint32_t x145;
   fiat_p256_uint1 x146;
-  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
   uint32_t x147;
   fiat_p256_uint1 x148;
-  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
   uint32_t x149;
   fiat_p256_uint1 x150;
-  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
-  uint32_t x151 = ((uint32_t)x150 + x119);
+  uint32_t x151;
   uint32_t x152;
   uint32_t x153;
-  fiat_p256_mulx_u32(&x152, &x153, x2, (arg2[7]));
   uint32_t x154;
   uint32_t x155;
-  fiat_p256_mulx_u32(&x154, &x155, x2, (arg2[6]));
   uint32_t x156;
   uint32_t x157;
-  fiat_p256_mulx_u32(&x156, &x157, x2, (arg2[5]));
   uint32_t x158;
   uint32_t x159;
-  fiat_p256_mulx_u32(&x158, &x159, x2, (arg2[4]));
   uint32_t x160;
   uint32_t x161;
-  fiat_p256_mulx_u32(&x160, &x161, x2, (arg2[3]));
   uint32_t x162;
   uint32_t x163;
-  fiat_p256_mulx_u32(&x162, &x163, x2, (arg2[2]));
   uint32_t x164;
   uint32_t x165;
-  fiat_p256_mulx_u32(&x164, &x165, x2, (arg2[1]));
   uint32_t x166;
   uint32_t x167;
-  fiat_p256_mulx_u32(&x166, &x167, x2, (arg2[0]));
   uint32_t x168;
   fiat_p256_uint1 x169;
-  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
   uint32_t x170;
   fiat_p256_uint1 x171;
-  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
   uint32_t x172;
   fiat_p256_uint1 x173;
-  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
   uint32_t x174;
   fiat_p256_uint1 x175;
-  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
   uint32_t x176;
   fiat_p256_uint1 x177;
-  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
   uint32_t x178;
   fiat_p256_uint1 x179;
-  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
   uint32_t x180;
   fiat_p256_uint1 x181;
-  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
-  uint32_t x182 = (x181 + x153);
+  uint32_t x182;
   uint32_t x183;
   fiat_p256_uint1 x184;
-  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
   uint32_t x185;
   fiat_p256_uint1 x186;
-  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
   uint32_t x187;
   fiat_p256_uint1 x188;
-  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
   uint32_t x189;
   fiat_p256_uint1 x190;
-  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
   uint32_t x191;
   fiat_p256_uint1 x192;
-  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
   uint32_t x193;
   fiat_p256_uint1 x194;
-  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
   uint32_t x195;
   fiat_p256_uint1 x196;
-  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
   uint32_t x197;
   fiat_p256_uint1 x198;
-  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
   uint32_t x199;
   fiat_p256_uint1 x200;
-  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
   uint32_t x201;
   uint32_t x202;
-  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
   uint32_t x203;
   uint32_t x204;
-  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
   uint32_t x205;
   uint32_t x206;
-  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
   uint32_t x207;
   uint32_t x208;
-  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
   uint32_t x209;
   fiat_p256_uint1 x210;
-  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
   uint32_t x211;
   fiat_p256_uint1 x212;
-  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
-  uint32_t x213 = (x212 + x204);
+  uint32_t x213;
   uint32_t x214;
   fiat_p256_uint1 x215;
-  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
   uint32_t x216;
   fiat_p256_uint1 x217;
-  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
   uint32_t x218;
   fiat_p256_uint1 x219;
-  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
   uint32_t x220;
   fiat_p256_uint1 x221;
-  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
   uint32_t x222;
   fiat_p256_uint1 x223;
-  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
   uint32_t x224;
   fiat_p256_uint1 x225;
-  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
   uint32_t x226;
   fiat_p256_uint1 x227;
-  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
   uint32_t x228;
   fiat_p256_uint1 x229;
-  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
   uint32_t x230;
   fiat_p256_uint1 x231;
-  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
-  uint32_t x232 = ((uint32_t)x231 + x200);
+  uint32_t x232;
   uint32_t x233;
   uint32_t x234;
-  fiat_p256_mulx_u32(&x233, &x234, x3, (arg2[7]));
   uint32_t x235;
   uint32_t x236;
-  fiat_p256_mulx_u32(&x235, &x236, x3, (arg2[6]));
   uint32_t x237;
   uint32_t x238;
-  fiat_p256_mulx_u32(&x237, &x238, x3, (arg2[5]));
   uint32_t x239;
   uint32_t x240;
-  fiat_p256_mulx_u32(&x239, &x240, x3, (arg2[4]));
   uint32_t x241;
   uint32_t x242;
-  fiat_p256_mulx_u32(&x241, &x242, x3, (arg2[3]));
   uint32_t x243;
   uint32_t x244;
-  fiat_p256_mulx_u32(&x243, &x244, x3, (arg2[2]));
   uint32_t x245;
   uint32_t x246;
-  fiat_p256_mulx_u32(&x245, &x246, x3, (arg2[1]));
   uint32_t x247;
   uint32_t x248;
-  fiat_p256_mulx_u32(&x247, &x248, x3, (arg2[0]));
   uint32_t x249;
   fiat_p256_uint1 x250;
-  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
   uint32_t x251;
   fiat_p256_uint1 x252;
-  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
   uint32_t x253;
   fiat_p256_uint1 x254;
-  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
   uint32_t x255;
   fiat_p256_uint1 x256;
-  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
   uint32_t x257;
   fiat_p256_uint1 x258;
-  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
   uint32_t x259;
   fiat_p256_uint1 x260;
-  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
   uint32_t x261;
   fiat_p256_uint1 x262;
-  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
-  uint32_t x263 = (x262 + x234);
+  uint32_t x263;
   uint32_t x264;
   fiat_p256_uint1 x265;
-  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
   uint32_t x266;
   fiat_p256_uint1 x267;
-  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
   uint32_t x268;
   fiat_p256_uint1 x269;
-  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
   uint32_t x270;
   fiat_p256_uint1 x271;
-  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
   uint32_t x272;
   fiat_p256_uint1 x273;
-  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
   uint32_t x274;
   fiat_p256_uint1 x275;
-  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
   uint32_t x276;
   fiat_p256_uint1 x277;
-  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
   uint32_t x278;
   fiat_p256_uint1 x279;
-  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
   uint32_t x280;
   fiat_p256_uint1 x281;
-  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
   uint32_t x282;
   uint32_t x283;
-  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
   uint32_t x284;
   uint32_t x285;
-  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
   uint32_t x286;
   uint32_t x287;
-  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
   uint32_t x288;
   uint32_t x289;
-  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
   uint32_t x290;
   fiat_p256_uint1 x291;
-  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
   uint32_t x292;
   fiat_p256_uint1 x293;
-  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
-  uint32_t x294 = (x293 + x285);
+  uint32_t x294;
   uint32_t x295;
   fiat_p256_uint1 x296;
-  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
   uint32_t x297;
   fiat_p256_uint1 x298;
-  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
   uint32_t x299;
   fiat_p256_uint1 x300;
-  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
   uint32_t x301;
   fiat_p256_uint1 x302;
-  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
   uint32_t x303;
   fiat_p256_uint1 x304;
-  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
   uint32_t x305;
   fiat_p256_uint1 x306;
-  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
   uint32_t x307;
   fiat_p256_uint1 x308;
-  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
   uint32_t x309;
   fiat_p256_uint1 x310;
-  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
   uint32_t x311;
   fiat_p256_uint1 x312;
-  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
-  uint32_t x313 = ((uint32_t)x312 + x281);
+  uint32_t x313;
   uint32_t x314;
   uint32_t x315;
-  fiat_p256_mulx_u32(&x314, &x315, x4, (arg2[7]));
   uint32_t x316;
   uint32_t x317;
-  fiat_p256_mulx_u32(&x316, &x317, x4, (arg2[6]));
   uint32_t x318;
   uint32_t x319;
-  fiat_p256_mulx_u32(&x318, &x319, x4, (arg2[5]));
   uint32_t x320;
   uint32_t x321;
-  fiat_p256_mulx_u32(&x320, &x321, x4, (arg2[4]));
   uint32_t x322;
   uint32_t x323;
-  fiat_p256_mulx_u32(&x322, &x323, x4, (arg2[3]));
   uint32_t x324;
   uint32_t x325;
-  fiat_p256_mulx_u32(&x324, &x325, x4, (arg2[2]));
   uint32_t x326;
   uint32_t x327;
-  fiat_p256_mulx_u32(&x326, &x327, x4, (arg2[1]));
   uint32_t x328;
   uint32_t x329;
-  fiat_p256_mulx_u32(&x328, &x329, x4, (arg2[0]));
   uint32_t x330;
   fiat_p256_uint1 x331;
-  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
   uint32_t x332;
   fiat_p256_uint1 x333;
-  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
   uint32_t x334;
   fiat_p256_uint1 x335;
-  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
   uint32_t x336;
   fiat_p256_uint1 x337;
-  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
   uint32_t x338;
   fiat_p256_uint1 x339;
-  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
   uint32_t x340;
   fiat_p256_uint1 x341;
-  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
   uint32_t x342;
   fiat_p256_uint1 x343;
-  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
-  uint32_t x344 = (x343 + x315);
+  uint32_t x344;
   uint32_t x345;
   fiat_p256_uint1 x346;
-  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
   uint32_t x347;
   fiat_p256_uint1 x348;
-  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
   uint32_t x349;
   fiat_p256_uint1 x350;
-  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
   uint32_t x351;
   fiat_p256_uint1 x352;
-  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
   uint32_t x353;
   fiat_p256_uint1 x354;
-  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
   uint32_t x355;
   fiat_p256_uint1 x356;
-  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
   uint32_t x357;
   fiat_p256_uint1 x358;
-  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
   uint32_t x359;
   fiat_p256_uint1 x360;
-  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
   uint32_t x361;
   fiat_p256_uint1 x362;
-  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
   uint32_t x363;
   uint32_t x364;
-  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
   uint32_t x365;
   uint32_t x366;
-  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
   uint32_t x367;
   uint32_t x368;
-  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
   uint32_t x369;
   uint32_t x370;
-  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
   uint32_t x371;
   fiat_p256_uint1 x372;
-  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
   uint32_t x373;
   fiat_p256_uint1 x374;
-  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
-  uint32_t x375 = (x374 + x366);
+  uint32_t x375;
   uint32_t x376;
   fiat_p256_uint1 x377;
-  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
   uint32_t x378;
   fiat_p256_uint1 x379;
-  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
   uint32_t x380;
   fiat_p256_uint1 x381;
-  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
   uint32_t x382;
   fiat_p256_uint1 x383;
-  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
   uint32_t x384;
   fiat_p256_uint1 x385;
-  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
   uint32_t x386;
   fiat_p256_uint1 x387;
-  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
   uint32_t x388;
   fiat_p256_uint1 x389;
-  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
   uint32_t x390;
   fiat_p256_uint1 x391;
-  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
   uint32_t x392;
   fiat_p256_uint1 x393;
-  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
-  uint32_t x394 = ((uint32_t)x393 + x362);
+  uint32_t x394;
   uint32_t x395;
   uint32_t x396;
-  fiat_p256_mulx_u32(&x395, &x396, x5, (arg2[7]));
   uint32_t x397;
   uint32_t x398;
-  fiat_p256_mulx_u32(&x397, &x398, x5, (arg2[6]));
   uint32_t x399;
   uint32_t x400;
-  fiat_p256_mulx_u32(&x399, &x400, x5, (arg2[5]));
   uint32_t x401;
   uint32_t x402;
-  fiat_p256_mulx_u32(&x401, &x402, x5, (arg2[4]));
   uint32_t x403;
   uint32_t x404;
-  fiat_p256_mulx_u32(&x403, &x404, x5, (arg2[3]));
   uint32_t x405;
   uint32_t x406;
-  fiat_p256_mulx_u32(&x405, &x406, x5, (arg2[2]));
   uint32_t x407;
   uint32_t x408;
-  fiat_p256_mulx_u32(&x407, &x408, x5, (arg2[1]));
   uint32_t x409;
   uint32_t x410;
-  fiat_p256_mulx_u32(&x409, &x410, x5, (arg2[0]));
   uint32_t x411;
   fiat_p256_uint1 x412;
-  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
   uint32_t x413;
   fiat_p256_uint1 x414;
-  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
   uint32_t x415;
   fiat_p256_uint1 x416;
-  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
   uint32_t x417;
   fiat_p256_uint1 x418;
-  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
   uint32_t x419;
   fiat_p256_uint1 x420;
-  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
   uint32_t x421;
   fiat_p256_uint1 x422;
-  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
   uint32_t x423;
   fiat_p256_uint1 x424;
-  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
-  uint32_t x425 = (x424 + x396);
+  uint32_t x425;
   uint32_t x426;
   fiat_p256_uint1 x427;
-  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
   uint32_t x428;
   fiat_p256_uint1 x429;
-  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
   uint32_t x430;
   fiat_p256_uint1 x431;
-  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
   uint32_t x432;
   fiat_p256_uint1 x433;
-  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
   uint32_t x434;
   fiat_p256_uint1 x435;
-  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
   uint32_t x436;
   fiat_p256_uint1 x437;
-  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
   uint32_t x438;
   fiat_p256_uint1 x439;
-  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
   uint32_t x440;
   fiat_p256_uint1 x441;
-  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
   uint32_t x442;
   fiat_p256_uint1 x443;
-  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
   uint32_t x444;
   uint32_t x445;
-  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
   uint32_t x446;
   uint32_t x447;
-  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
   uint32_t x448;
   uint32_t x449;
-  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
   uint32_t x450;
   uint32_t x451;
-  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
   uint32_t x452;
   fiat_p256_uint1 x453;
-  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
   uint32_t x454;
   fiat_p256_uint1 x455;
-  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
-  uint32_t x456 = (x455 + x447);
+  uint32_t x456;
   uint32_t x457;
   fiat_p256_uint1 x458;
-  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
   uint32_t x459;
   fiat_p256_uint1 x460;
-  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
   uint32_t x461;
   fiat_p256_uint1 x462;
-  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
   uint32_t x463;
   fiat_p256_uint1 x464;
-  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
   uint32_t x465;
   fiat_p256_uint1 x466;
-  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
   uint32_t x467;
   fiat_p256_uint1 x468;
-  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
   uint32_t x469;
   fiat_p256_uint1 x470;
-  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
   uint32_t x471;
   fiat_p256_uint1 x472;
-  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
   uint32_t x473;
   fiat_p256_uint1 x474;
-  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
-  uint32_t x475 = ((uint32_t)x474 + x443);
+  uint32_t x475;
   uint32_t x476;
   uint32_t x477;
-  fiat_p256_mulx_u32(&x476, &x477, x6, (arg2[7]));
   uint32_t x478;
   uint32_t x479;
-  fiat_p256_mulx_u32(&x478, &x479, x6, (arg2[6]));
   uint32_t x480;
   uint32_t x481;
-  fiat_p256_mulx_u32(&x480, &x481, x6, (arg2[5]));
   uint32_t x482;
   uint32_t x483;
-  fiat_p256_mulx_u32(&x482, &x483, x6, (arg2[4]));
   uint32_t x484;
   uint32_t x485;
-  fiat_p256_mulx_u32(&x484, &x485, x6, (arg2[3]));
   uint32_t x486;
   uint32_t x487;
-  fiat_p256_mulx_u32(&x486, &x487, x6, (arg2[2]));
   uint32_t x488;
   uint32_t x489;
-  fiat_p256_mulx_u32(&x488, &x489, x6, (arg2[1]));
   uint32_t x490;
   uint32_t x491;
-  fiat_p256_mulx_u32(&x490, &x491, x6, (arg2[0]));
   uint32_t x492;
   fiat_p256_uint1 x493;
-  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
   uint32_t x494;
   fiat_p256_uint1 x495;
-  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
   uint32_t x496;
   fiat_p256_uint1 x497;
-  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
   uint32_t x498;
   fiat_p256_uint1 x499;
-  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
   uint32_t x500;
   fiat_p256_uint1 x501;
-  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
   uint32_t x502;
   fiat_p256_uint1 x503;
-  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
   uint32_t x504;
   fiat_p256_uint1 x505;
-  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
-  uint32_t x506 = (x505 + x477);
+  uint32_t x506;
   uint32_t x507;
   fiat_p256_uint1 x508;
-  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
   uint32_t x509;
   fiat_p256_uint1 x510;
-  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
   uint32_t x511;
   fiat_p256_uint1 x512;
-  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
   uint32_t x513;
   fiat_p256_uint1 x514;
-  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
   uint32_t x515;
   fiat_p256_uint1 x516;
-  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
   uint32_t x517;
   fiat_p256_uint1 x518;
-  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
   uint32_t x519;
   fiat_p256_uint1 x520;
-  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
   uint32_t x521;
   fiat_p256_uint1 x522;
-  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
   uint32_t x523;
   fiat_p256_uint1 x524;
-  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
   uint32_t x525;
   uint32_t x526;
-  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
   uint32_t x527;
   uint32_t x528;
-  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
   uint32_t x529;
   uint32_t x530;
-  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
   uint32_t x531;
   uint32_t x532;
-  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
   uint32_t x533;
   fiat_p256_uint1 x534;
-  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
   uint32_t x535;
   fiat_p256_uint1 x536;
-  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
-  uint32_t x537 = (x536 + x528);
+  uint32_t x537;
   uint32_t x538;
   fiat_p256_uint1 x539;
-  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
   uint32_t x540;
   fiat_p256_uint1 x541;
-  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
   uint32_t x542;
   fiat_p256_uint1 x543;
-  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
   uint32_t x544;
   fiat_p256_uint1 x545;
-  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
   uint32_t x546;
   fiat_p256_uint1 x547;
-  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
   uint32_t x548;
   fiat_p256_uint1 x549;
-  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
   uint32_t x550;
   fiat_p256_uint1 x551;
-  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
   uint32_t x552;
   fiat_p256_uint1 x553;
-  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
   uint32_t x554;
   fiat_p256_uint1 x555;
-  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
-  uint32_t x556 = ((uint32_t)x555 + x524);
+  uint32_t x556;
   uint32_t x557;
   uint32_t x558;
-  fiat_p256_mulx_u32(&x557, &x558, x7, (arg2[7]));
   uint32_t x559;
   uint32_t x560;
-  fiat_p256_mulx_u32(&x559, &x560, x7, (arg2[6]));
   uint32_t x561;
   uint32_t x562;
-  fiat_p256_mulx_u32(&x561, &x562, x7, (arg2[5]));
   uint32_t x563;
   uint32_t x564;
-  fiat_p256_mulx_u32(&x563, &x564, x7, (arg2[4]));
   uint32_t x565;
   uint32_t x566;
-  fiat_p256_mulx_u32(&x565, &x566, x7, (arg2[3]));
   uint32_t x567;
   uint32_t x568;
-  fiat_p256_mulx_u32(&x567, &x568, x7, (arg2[2]));
   uint32_t x569;
   uint32_t x570;
-  fiat_p256_mulx_u32(&x569, &x570, x7, (arg2[1]));
   uint32_t x571;
   uint32_t x572;
-  fiat_p256_mulx_u32(&x571, &x572, x7, (arg2[0]));
   uint32_t x573;
   fiat_p256_uint1 x574;
-  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
   uint32_t x575;
   fiat_p256_uint1 x576;
-  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
   uint32_t x577;
   fiat_p256_uint1 x578;
-  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
   uint32_t x579;
   fiat_p256_uint1 x580;
-  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
   uint32_t x581;
   fiat_p256_uint1 x582;
-  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
   uint32_t x583;
   fiat_p256_uint1 x584;
-  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
   uint32_t x585;
   fiat_p256_uint1 x586;
-  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
-  uint32_t x587 = (x586 + x558);
+  uint32_t x587;
   uint32_t x588;
   fiat_p256_uint1 x589;
-  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
   uint32_t x590;
   fiat_p256_uint1 x591;
-  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
   uint32_t x592;
   fiat_p256_uint1 x593;
-  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
   uint32_t x594;
   fiat_p256_uint1 x595;
-  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
   uint32_t x596;
   fiat_p256_uint1 x597;
-  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
   uint32_t x598;
   fiat_p256_uint1 x599;
-  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
   uint32_t x600;
   fiat_p256_uint1 x601;
-  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
   uint32_t x602;
   fiat_p256_uint1 x603;
-  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
   uint32_t x604;
   fiat_p256_uint1 x605;
-  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
   uint32_t x606;
   uint32_t x607;
-  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
   uint32_t x608;
   uint32_t x609;
-  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
   uint32_t x610;
   uint32_t x611;
-  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
   uint32_t x612;
   uint32_t x613;
-  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
   uint32_t x614;
   fiat_p256_uint1 x615;
-  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
   uint32_t x616;
   fiat_p256_uint1 x617;
-  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
-  uint32_t x618 = (x617 + x609);
+  uint32_t x618;
   uint32_t x619;
   fiat_p256_uint1 x620;
-  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
   uint32_t x621;
   fiat_p256_uint1 x622;
-  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
   uint32_t x623;
   fiat_p256_uint1 x624;
-  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
   uint32_t x625;
   fiat_p256_uint1 x626;
-  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
   uint32_t x627;
   fiat_p256_uint1 x628;
-  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
   uint32_t x629;
   fiat_p256_uint1 x630;
-  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
   uint32_t x631;
   fiat_p256_uint1 x632;
-  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
   uint32_t x633;
   fiat_p256_uint1 x634;
-  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
   uint32_t x635;
   fiat_p256_uint1 x636;
-  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
-  uint32_t x637 = ((uint32_t)x636 + x605);
+  uint32_t x637;
   uint32_t x638;
   fiat_p256_uint1 x639;
-  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
   uint32_t x640;
   fiat_p256_uint1 x641;
-  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
   uint32_t x642;
   fiat_p256_uint1 x643;
-  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
   uint32_t x644;
   fiat_p256_uint1 x645;
-  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
   uint32_t x646;
   fiat_p256_uint1 x647;
-  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
   uint32_t x648;
   fiat_p256_uint1 x649;
-  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
   uint32_t x650;
   fiat_p256_uint1 x651;
-  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
   uint32_t x652;
   fiat_p256_uint1 x653;
-  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
   uint32_t x654;
   fiat_p256_uint1 x655;
-  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
   uint32_t x656;
-  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
   uint32_t x657;
-  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
   uint32_t x658;
-  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
   uint32_t x659;
-  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
   uint32_t x660;
-  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
   uint32_t x661;
-  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
   uint32_t x662;
-  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
   uint32_t x663;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[4]);
+  x5 = (arg1[5]);
+  x6 = (arg1[6]);
+  x7 = (arg1[7]);
+  x8 = (arg1[0]);
+  fiat_p256_mulx_u32(&x9, &x10, x8, (arg2[7]));
+  fiat_p256_mulx_u32(&x11, &x12, x8, (arg2[6]));
+  fiat_p256_mulx_u32(&x13, &x14, x8, (arg2[5]));
+  fiat_p256_mulx_u32(&x15, &x16, x8, (arg2[4]));
+  fiat_p256_mulx_u32(&x17, &x18, x8, (arg2[3]));
+  fiat_p256_mulx_u32(&x19, &x20, x8, (arg2[2]));
+  fiat_p256_mulx_u32(&x21, &x22, x8, (arg2[1]));
+  fiat_p256_mulx_u32(&x23, &x24, x8, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
+  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
+  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
+  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
+  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
+  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
+  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
+  x39 = (x38 + x10);
+  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
+  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
+  x52 = (x51 + x43);
+  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
+  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
+  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
+  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
+  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
+  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
+  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
+  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
+  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
+  fiat_p256_mulx_u32(&x71, &x72, x1, (arg2[7]));
+  fiat_p256_mulx_u32(&x73, &x74, x1, (arg2[6]));
+  fiat_p256_mulx_u32(&x75, &x76, x1, (arg2[5]));
+  fiat_p256_mulx_u32(&x77, &x78, x1, (arg2[4]));
+  fiat_p256_mulx_u32(&x79, &x80, x1, (arg2[3]));
+  fiat_p256_mulx_u32(&x81, &x82, x1, (arg2[2]));
+  fiat_p256_mulx_u32(&x83, &x84, x1, (arg2[1]));
+  fiat_p256_mulx_u32(&x85, &x86, x1, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
+  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
+  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
+  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
+  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
+  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
+  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
+  x101 = (x100 + x72);
+  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
+  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
+  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
+  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
+  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
+  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
+  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
+  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
+  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
+  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
+  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
+  x132 = (x131 + x123);
+  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
+  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
+  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
+  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
+  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
+  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
+  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
+  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
+  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
+  x151 = ((uint32_t)x150 + x119);
+  fiat_p256_mulx_u32(&x152, &x153, x2, (arg2[7]));
+  fiat_p256_mulx_u32(&x154, &x155, x2, (arg2[6]));
+  fiat_p256_mulx_u32(&x156, &x157, x2, (arg2[5]));
+  fiat_p256_mulx_u32(&x158, &x159, x2, (arg2[4]));
+  fiat_p256_mulx_u32(&x160, &x161, x2, (arg2[3]));
+  fiat_p256_mulx_u32(&x162, &x163, x2, (arg2[2]));
+  fiat_p256_mulx_u32(&x164, &x165, x2, (arg2[1]));
+  fiat_p256_mulx_u32(&x166, &x167, x2, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
+  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
+  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
+  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
+  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
+  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
+  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
+  x182 = (x181 + x153);
+  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
+  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
+  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
+  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
+  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
+  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
+  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
+  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
+  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
+  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
+  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
+  x213 = (x212 + x204);
+  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
+  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
+  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
+  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
+  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
+  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
+  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
+  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
+  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
+  x232 = ((uint32_t)x231 + x200);
+  fiat_p256_mulx_u32(&x233, &x234, x3, (arg2[7]));
+  fiat_p256_mulx_u32(&x235, &x236, x3, (arg2[6]));
+  fiat_p256_mulx_u32(&x237, &x238, x3, (arg2[5]));
+  fiat_p256_mulx_u32(&x239, &x240, x3, (arg2[4]));
+  fiat_p256_mulx_u32(&x241, &x242, x3, (arg2[3]));
+  fiat_p256_mulx_u32(&x243, &x244, x3, (arg2[2]));
+  fiat_p256_mulx_u32(&x245, &x246, x3, (arg2[1]));
+  fiat_p256_mulx_u32(&x247, &x248, x3, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
+  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
+  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
+  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
+  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
+  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
+  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
+  x263 = (x262 + x234);
+  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
+  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
+  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
+  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
+  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
+  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
+  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
+  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
+  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
+  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
+  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
+  x294 = (x293 + x285);
+  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
+  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
+  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
+  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
+  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
+  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
+  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
+  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
+  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
+  x313 = ((uint32_t)x312 + x281);
+  fiat_p256_mulx_u32(&x314, &x315, x4, (arg2[7]));
+  fiat_p256_mulx_u32(&x316, &x317, x4, (arg2[6]));
+  fiat_p256_mulx_u32(&x318, &x319, x4, (arg2[5]));
+  fiat_p256_mulx_u32(&x320, &x321, x4, (arg2[4]));
+  fiat_p256_mulx_u32(&x322, &x323, x4, (arg2[3]));
+  fiat_p256_mulx_u32(&x324, &x325, x4, (arg2[2]));
+  fiat_p256_mulx_u32(&x326, &x327, x4, (arg2[1]));
+  fiat_p256_mulx_u32(&x328, &x329, x4, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
+  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
+  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
+  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
+  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
+  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
+  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
+  x344 = (x343 + x315);
+  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
+  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
+  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
+  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
+  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
+  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
+  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
+  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
+  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
+  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
+  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
+  x375 = (x374 + x366);
+  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
+  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
+  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
+  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
+  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
+  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
+  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
+  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
+  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
+  x394 = ((uint32_t)x393 + x362);
+  fiat_p256_mulx_u32(&x395, &x396, x5, (arg2[7]));
+  fiat_p256_mulx_u32(&x397, &x398, x5, (arg2[6]));
+  fiat_p256_mulx_u32(&x399, &x400, x5, (arg2[5]));
+  fiat_p256_mulx_u32(&x401, &x402, x5, (arg2[4]));
+  fiat_p256_mulx_u32(&x403, &x404, x5, (arg2[3]));
+  fiat_p256_mulx_u32(&x405, &x406, x5, (arg2[2]));
+  fiat_p256_mulx_u32(&x407, &x408, x5, (arg2[1]));
+  fiat_p256_mulx_u32(&x409, &x410, x5, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
+  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
+  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
+  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
+  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
+  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
+  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
+  x425 = (x424 + x396);
+  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
+  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
+  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
+  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
+  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
+  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
+  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
+  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
+  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
+  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
+  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
+  x456 = (x455 + x447);
+  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
+  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
+  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
+  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
+  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
+  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
+  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
+  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
+  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
+  x475 = ((uint32_t)x474 + x443);
+  fiat_p256_mulx_u32(&x476, &x477, x6, (arg2[7]));
+  fiat_p256_mulx_u32(&x478, &x479, x6, (arg2[6]));
+  fiat_p256_mulx_u32(&x480, &x481, x6, (arg2[5]));
+  fiat_p256_mulx_u32(&x482, &x483, x6, (arg2[4]));
+  fiat_p256_mulx_u32(&x484, &x485, x6, (arg2[3]));
+  fiat_p256_mulx_u32(&x486, &x487, x6, (arg2[2]));
+  fiat_p256_mulx_u32(&x488, &x489, x6, (arg2[1]));
+  fiat_p256_mulx_u32(&x490, &x491, x6, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
+  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
+  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
+  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
+  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
+  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
+  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
+  x506 = (x505 + x477);
+  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
+  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
+  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
+  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
+  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
+  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
+  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
+  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
+  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
+  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
+  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
+  x537 = (x536 + x528);
+  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
+  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
+  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
+  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
+  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
+  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
+  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
+  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
+  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
+  x556 = ((uint32_t)x555 + x524);
+  fiat_p256_mulx_u32(&x557, &x558, x7, (arg2[7]));
+  fiat_p256_mulx_u32(&x559, &x560, x7, (arg2[6]));
+  fiat_p256_mulx_u32(&x561, &x562, x7, (arg2[5]));
+  fiat_p256_mulx_u32(&x563, &x564, x7, (arg2[4]));
+  fiat_p256_mulx_u32(&x565, &x566, x7, (arg2[3]));
+  fiat_p256_mulx_u32(&x567, &x568, x7, (arg2[2]));
+  fiat_p256_mulx_u32(&x569, &x570, x7, (arg2[1]));
+  fiat_p256_mulx_u32(&x571, &x572, x7, (arg2[0]));
+  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
+  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
+  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
+  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
+  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
+  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
+  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
+  x587 = (x586 + x558);
+  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
+  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
+  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
+  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
+  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
+  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
+  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
+  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
+  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
+  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
+  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
+  x618 = (x617 + x609);
+  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
+  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
+  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
+  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
+  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
+  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
+  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
+  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
+  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
+  x637 = ((uint32_t)x636 + x605);
+  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
+  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
+  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
+  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
+  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
+  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
+  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
+  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
+  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
+  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
+  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
+  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
   fiat_p256_cmovznz_u32(&x663, x655, x652, x635);
   out1[0] = x656;
   out1[1] = x657;
@@ -1121,1000 +1186,1028 @@
 
 /*
  * The function fiat_p256_square squares a field element in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_square(uint32_t out1[8], const uint32_t arg1[8]) {
-  uint32_t x1 = (arg1[1]);
-  uint32_t x2 = (arg1[2]);
-  uint32_t x3 = (arg1[3]);
-  uint32_t x4 = (arg1[4]);
-  uint32_t x5 = (arg1[5]);
-  uint32_t x6 = (arg1[6]);
-  uint32_t x7 = (arg1[7]);
-  uint32_t x8 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
   uint32_t x9;
   uint32_t x10;
-  fiat_p256_mulx_u32(&x9, &x10, x8, (arg1[7]));
   uint32_t x11;
   uint32_t x12;
-  fiat_p256_mulx_u32(&x11, &x12, x8, (arg1[6]));
   uint32_t x13;
   uint32_t x14;
-  fiat_p256_mulx_u32(&x13, &x14, x8, (arg1[5]));
   uint32_t x15;
   uint32_t x16;
-  fiat_p256_mulx_u32(&x15, &x16, x8, (arg1[4]));
   uint32_t x17;
   uint32_t x18;
-  fiat_p256_mulx_u32(&x17, &x18, x8, (arg1[3]));
   uint32_t x19;
   uint32_t x20;
-  fiat_p256_mulx_u32(&x19, &x20, x8, (arg1[2]));
   uint32_t x21;
   uint32_t x22;
-  fiat_p256_mulx_u32(&x21, &x22, x8, (arg1[1]));
   uint32_t x23;
   uint32_t x24;
-  fiat_p256_mulx_u32(&x23, &x24, x8, (arg1[0]));
   uint32_t x25;
   fiat_p256_uint1 x26;
-  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
   uint32_t x27;
   fiat_p256_uint1 x28;
-  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
   uint32_t x29;
   fiat_p256_uint1 x30;
-  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
   uint32_t x31;
   fiat_p256_uint1 x32;
-  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
   uint32_t x33;
   fiat_p256_uint1 x34;
-  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
   uint32_t x35;
   fiat_p256_uint1 x36;
-  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
   uint32_t x37;
   fiat_p256_uint1 x38;
-  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
-  uint32_t x39 = (x38 + x10);
+  uint32_t x39;
   uint32_t x40;
   uint32_t x41;
-  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
   uint32_t x42;
   uint32_t x43;
-  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
   uint32_t x44;
   uint32_t x45;
-  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
   uint32_t x46;
   uint32_t x47;
-  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
   uint32_t x48;
   fiat_p256_uint1 x49;
-  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
   uint32_t x50;
   fiat_p256_uint1 x51;
-  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
-  uint32_t x52 = (x51 + x43);
+  uint32_t x52;
   uint32_t x53;
   fiat_p256_uint1 x54;
-  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
   uint32_t x55;
   fiat_p256_uint1 x56;
-  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
   uint32_t x57;
   fiat_p256_uint1 x58;
-  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
   uint32_t x59;
   fiat_p256_uint1 x60;
-  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
   uint32_t x61;
   fiat_p256_uint1 x62;
-  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
   uint32_t x63;
   fiat_p256_uint1 x64;
-  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
   uint32_t x65;
   fiat_p256_uint1 x66;
-  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
   uint32_t x67;
   fiat_p256_uint1 x68;
-  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
   uint32_t x69;
   fiat_p256_uint1 x70;
-  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
   uint32_t x71;
   uint32_t x72;
-  fiat_p256_mulx_u32(&x71, &x72, x1, (arg1[7]));
   uint32_t x73;
   uint32_t x74;
-  fiat_p256_mulx_u32(&x73, &x74, x1, (arg1[6]));
   uint32_t x75;
   uint32_t x76;
-  fiat_p256_mulx_u32(&x75, &x76, x1, (arg1[5]));
   uint32_t x77;
   uint32_t x78;
-  fiat_p256_mulx_u32(&x77, &x78, x1, (arg1[4]));
   uint32_t x79;
   uint32_t x80;
-  fiat_p256_mulx_u32(&x79, &x80, x1, (arg1[3]));
   uint32_t x81;
   uint32_t x82;
-  fiat_p256_mulx_u32(&x81, &x82, x1, (arg1[2]));
   uint32_t x83;
   uint32_t x84;
-  fiat_p256_mulx_u32(&x83, &x84, x1, (arg1[1]));
   uint32_t x85;
   uint32_t x86;
-  fiat_p256_mulx_u32(&x85, &x86, x1, (arg1[0]));
   uint32_t x87;
   fiat_p256_uint1 x88;
-  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
   uint32_t x89;
   fiat_p256_uint1 x90;
-  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
   uint32_t x91;
   fiat_p256_uint1 x92;
-  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
   uint32_t x93;
   fiat_p256_uint1 x94;
-  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
   uint32_t x95;
   fiat_p256_uint1 x96;
-  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
   uint32_t x97;
   fiat_p256_uint1 x98;
-  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
   uint32_t x99;
   fiat_p256_uint1 x100;
-  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
-  uint32_t x101 = (x100 + x72);
+  uint32_t x101;
   uint32_t x102;
   fiat_p256_uint1 x103;
-  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
   uint32_t x104;
   fiat_p256_uint1 x105;
-  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
   uint32_t x106;
   fiat_p256_uint1 x107;
-  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
   uint32_t x108;
   fiat_p256_uint1 x109;
-  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
   uint32_t x110;
   fiat_p256_uint1 x111;
-  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
   uint32_t x112;
   fiat_p256_uint1 x113;
-  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
   uint32_t x114;
   fiat_p256_uint1 x115;
-  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
   uint32_t x116;
   fiat_p256_uint1 x117;
-  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
   uint32_t x118;
   fiat_p256_uint1 x119;
-  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
   uint32_t x120;
   uint32_t x121;
-  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
   uint32_t x122;
   uint32_t x123;
-  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
   uint32_t x124;
   uint32_t x125;
-  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
   uint32_t x126;
   uint32_t x127;
-  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
   uint32_t x128;
   fiat_p256_uint1 x129;
-  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
   uint32_t x130;
   fiat_p256_uint1 x131;
-  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
-  uint32_t x132 = (x131 + x123);
+  uint32_t x132;
   uint32_t x133;
   fiat_p256_uint1 x134;
-  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
   uint32_t x135;
   fiat_p256_uint1 x136;
-  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
   uint32_t x137;
   fiat_p256_uint1 x138;
-  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
   uint32_t x139;
   fiat_p256_uint1 x140;
-  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
   uint32_t x141;
   fiat_p256_uint1 x142;
-  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
   uint32_t x143;
   fiat_p256_uint1 x144;
-  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
   uint32_t x145;
   fiat_p256_uint1 x146;
-  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
   uint32_t x147;
   fiat_p256_uint1 x148;
-  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
   uint32_t x149;
   fiat_p256_uint1 x150;
-  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
-  uint32_t x151 = ((uint32_t)x150 + x119);
+  uint32_t x151;
   uint32_t x152;
   uint32_t x153;
-  fiat_p256_mulx_u32(&x152, &x153, x2, (arg1[7]));
   uint32_t x154;
   uint32_t x155;
-  fiat_p256_mulx_u32(&x154, &x155, x2, (arg1[6]));
   uint32_t x156;
   uint32_t x157;
-  fiat_p256_mulx_u32(&x156, &x157, x2, (arg1[5]));
   uint32_t x158;
   uint32_t x159;
-  fiat_p256_mulx_u32(&x158, &x159, x2, (arg1[4]));
   uint32_t x160;
   uint32_t x161;
-  fiat_p256_mulx_u32(&x160, &x161, x2, (arg1[3]));
   uint32_t x162;
   uint32_t x163;
-  fiat_p256_mulx_u32(&x162, &x163, x2, (arg1[2]));
   uint32_t x164;
   uint32_t x165;
-  fiat_p256_mulx_u32(&x164, &x165, x2, (arg1[1]));
   uint32_t x166;
   uint32_t x167;
-  fiat_p256_mulx_u32(&x166, &x167, x2, (arg1[0]));
   uint32_t x168;
   fiat_p256_uint1 x169;
-  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
   uint32_t x170;
   fiat_p256_uint1 x171;
-  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
   uint32_t x172;
   fiat_p256_uint1 x173;
-  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
   uint32_t x174;
   fiat_p256_uint1 x175;
-  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
   uint32_t x176;
   fiat_p256_uint1 x177;
-  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
   uint32_t x178;
   fiat_p256_uint1 x179;
-  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
   uint32_t x180;
   fiat_p256_uint1 x181;
-  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
-  uint32_t x182 = (x181 + x153);
+  uint32_t x182;
   uint32_t x183;
   fiat_p256_uint1 x184;
-  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
   uint32_t x185;
   fiat_p256_uint1 x186;
-  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
   uint32_t x187;
   fiat_p256_uint1 x188;
-  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
   uint32_t x189;
   fiat_p256_uint1 x190;
-  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
   uint32_t x191;
   fiat_p256_uint1 x192;
-  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
   uint32_t x193;
   fiat_p256_uint1 x194;
-  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
   uint32_t x195;
   fiat_p256_uint1 x196;
-  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
   uint32_t x197;
   fiat_p256_uint1 x198;
-  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
   uint32_t x199;
   fiat_p256_uint1 x200;
-  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
   uint32_t x201;
   uint32_t x202;
-  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
   uint32_t x203;
   uint32_t x204;
-  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
   uint32_t x205;
   uint32_t x206;
-  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
   uint32_t x207;
   uint32_t x208;
-  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
   uint32_t x209;
   fiat_p256_uint1 x210;
-  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
   uint32_t x211;
   fiat_p256_uint1 x212;
-  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
-  uint32_t x213 = (x212 + x204);
+  uint32_t x213;
   uint32_t x214;
   fiat_p256_uint1 x215;
-  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
   uint32_t x216;
   fiat_p256_uint1 x217;
-  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
   uint32_t x218;
   fiat_p256_uint1 x219;
-  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
   uint32_t x220;
   fiat_p256_uint1 x221;
-  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
   uint32_t x222;
   fiat_p256_uint1 x223;
-  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
   uint32_t x224;
   fiat_p256_uint1 x225;
-  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
   uint32_t x226;
   fiat_p256_uint1 x227;
-  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
   uint32_t x228;
   fiat_p256_uint1 x229;
-  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
   uint32_t x230;
   fiat_p256_uint1 x231;
-  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
-  uint32_t x232 = ((uint32_t)x231 + x200);
+  uint32_t x232;
   uint32_t x233;
   uint32_t x234;
-  fiat_p256_mulx_u32(&x233, &x234, x3, (arg1[7]));
   uint32_t x235;
   uint32_t x236;
-  fiat_p256_mulx_u32(&x235, &x236, x3, (arg1[6]));
   uint32_t x237;
   uint32_t x238;
-  fiat_p256_mulx_u32(&x237, &x238, x3, (arg1[5]));
   uint32_t x239;
   uint32_t x240;
-  fiat_p256_mulx_u32(&x239, &x240, x3, (arg1[4]));
   uint32_t x241;
   uint32_t x242;
-  fiat_p256_mulx_u32(&x241, &x242, x3, (arg1[3]));
   uint32_t x243;
   uint32_t x244;
-  fiat_p256_mulx_u32(&x243, &x244, x3, (arg1[2]));
   uint32_t x245;
   uint32_t x246;
-  fiat_p256_mulx_u32(&x245, &x246, x3, (arg1[1]));
   uint32_t x247;
   uint32_t x248;
-  fiat_p256_mulx_u32(&x247, &x248, x3, (arg1[0]));
   uint32_t x249;
   fiat_p256_uint1 x250;
-  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
   uint32_t x251;
   fiat_p256_uint1 x252;
-  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
   uint32_t x253;
   fiat_p256_uint1 x254;
-  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
   uint32_t x255;
   fiat_p256_uint1 x256;
-  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
   uint32_t x257;
   fiat_p256_uint1 x258;
-  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
   uint32_t x259;
   fiat_p256_uint1 x260;
-  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
   uint32_t x261;
   fiat_p256_uint1 x262;
-  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
-  uint32_t x263 = (x262 + x234);
+  uint32_t x263;
   uint32_t x264;
   fiat_p256_uint1 x265;
-  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
   uint32_t x266;
   fiat_p256_uint1 x267;
-  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
   uint32_t x268;
   fiat_p256_uint1 x269;
-  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
   uint32_t x270;
   fiat_p256_uint1 x271;
-  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
   uint32_t x272;
   fiat_p256_uint1 x273;
-  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
   uint32_t x274;
   fiat_p256_uint1 x275;
-  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
   uint32_t x276;
   fiat_p256_uint1 x277;
-  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
   uint32_t x278;
   fiat_p256_uint1 x279;
-  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
   uint32_t x280;
   fiat_p256_uint1 x281;
-  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
   uint32_t x282;
   uint32_t x283;
-  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
   uint32_t x284;
   uint32_t x285;
-  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
   uint32_t x286;
   uint32_t x287;
-  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
   uint32_t x288;
   uint32_t x289;
-  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
   uint32_t x290;
   fiat_p256_uint1 x291;
-  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
   uint32_t x292;
   fiat_p256_uint1 x293;
-  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
-  uint32_t x294 = (x293 + x285);
+  uint32_t x294;
   uint32_t x295;
   fiat_p256_uint1 x296;
-  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
   uint32_t x297;
   fiat_p256_uint1 x298;
-  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
   uint32_t x299;
   fiat_p256_uint1 x300;
-  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
   uint32_t x301;
   fiat_p256_uint1 x302;
-  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
   uint32_t x303;
   fiat_p256_uint1 x304;
-  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
   uint32_t x305;
   fiat_p256_uint1 x306;
-  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
   uint32_t x307;
   fiat_p256_uint1 x308;
-  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
   uint32_t x309;
   fiat_p256_uint1 x310;
-  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
   uint32_t x311;
   fiat_p256_uint1 x312;
-  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
-  uint32_t x313 = ((uint32_t)x312 + x281);
+  uint32_t x313;
   uint32_t x314;
   uint32_t x315;
-  fiat_p256_mulx_u32(&x314, &x315, x4, (arg1[7]));
   uint32_t x316;
   uint32_t x317;
-  fiat_p256_mulx_u32(&x316, &x317, x4, (arg1[6]));
   uint32_t x318;
   uint32_t x319;
-  fiat_p256_mulx_u32(&x318, &x319, x4, (arg1[5]));
   uint32_t x320;
   uint32_t x321;
-  fiat_p256_mulx_u32(&x320, &x321, x4, (arg1[4]));
   uint32_t x322;
   uint32_t x323;
-  fiat_p256_mulx_u32(&x322, &x323, x4, (arg1[3]));
   uint32_t x324;
   uint32_t x325;
-  fiat_p256_mulx_u32(&x324, &x325, x4, (arg1[2]));
   uint32_t x326;
   uint32_t x327;
-  fiat_p256_mulx_u32(&x326, &x327, x4, (arg1[1]));
   uint32_t x328;
   uint32_t x329;
-  fiat_p256_mulx_u32(&x328, &x329, x4, (arg1[0]));
   uint32_t x330;
   fiat_p256_uint1 x331;
-  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
   uint32_t x332;
   fiat_p256_uint1 x333;
-  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
   uint32_t x334;
   fiat_p256_uint1 x335;
-  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
   uint32_t x336;
   fiat_p256_uint1 x337;
-  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
   uint32_t x338;
   fiat_p256_uint1 x339;
-  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
   uint32_t x340;
   fiat_p256_uint1 x341;
-  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
   uint32_t x342;
   fiat_p256_uint1 x343;
-  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
-  uint32_t x344 = (x343 + x315);
+  uint32_t x344;
   uint32_t x345;
   fiat_p256_uint1 x346;
-  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
   uint32_t x347;
   fiat_p256_uint1 x348;
-  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
   uint32_t x349;
   fiat_p256_uint1 x350;
-  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
   uint32_t x351;
   fiat_p256_uint1 x352;
-  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
   uint32_t x353;
   fiat_p256_uint1 x354;
-  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
   uint32_t x355;
   fiat_p256_uint1 x356;
-  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
   uint32_t x357;
   fiat_p256_uint1 x358;
-  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
   uint32_t x359;
   fiat_p256_uint1 x360;
-  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
   uint32_t x361;
   fiat_p256_uint1 x362;
-  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
   uint32_t x363;
   uint32_t x364;
-  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
   uint32_t x365;
   uint32_t x366;
-  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
   uint32_t x367;
   uint32_t x368;
-  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
   uint32_t x369;
   uint32_t x370;
-  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
   uint32_t x371;
   fiat_p256_uint1 x372;
-  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
   uint32_t x373;
   fiat_p256_uint1 x374;
-  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
-  uint32_t x375 = (x374 + x366);
+  uint32_t x375;
   uint32_t x376;
   fiat_p256_uint1 x377;
-  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
   uint32_t x378;
   fiat_p256_uint1 x379;
-  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
   uint32_t x380;
   fiat_p256_uint1 x381;
-  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
   uint32_t x382;
   fiat_p256_uint1 x383;
-  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
   uint32_t x384;
   fiat_p256_uint1 x385;
-  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
   uint32_t x386;
   fiat_p256_uint1 x387;
-  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
   uint32_t x388;
   fiat_p256_uint1 x389;
-  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
   uint32_t x390;
   fiat_p256_uint1 x391;
-  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
   uint32_t x392;
   fiat_p256_uint1 x393;
-  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
-  uint32_t x394 = ((uint32_t)x393 + x362);
+  uint32_t x394;
   uint32_t x395;
   uint32_t x396;
-  fiat_p256_mulx_u32(&x395, &x396, x5, (arg1[7]));
   uint32_t x397;
   uint32_t x398;
-  fiat_p256_mulx_u32(&x397, &x398, x5, (arg1[6]));
   uint32_t x399;
   uint32_t x400;
-  fiat_p256_mulx_u32(&x399, &x400, x5, (arg1[5]));
   uint32_t x401;
   uint32_t x402;
-  fiat_p256_mulx_u32(&x401, &x402, x5, (arg1[4]));
   uint32_t x403;
   uint32_t x404;
-  fiat_p256_mulx_u32(&x403, &x404, x5, (arg1[3]));
   uint32_t x405;
   uint32_t x406;
-  fiat_p256_mulx_u32(&x405, &x406, x5, (arg1[2]));
   uint32_t x407;
   uint32_t x408;
-  fiat_p256_mulx_u32(&x407, &x408, x5, (arg1[1]));
   uint32_t x409;
   uint32_t x410;
-  fiat_p256_mulx_u32(&x409, &x410, x5, (arg1[0]));
   uint32_t x411;
   fiat_p256_uint1 x412;
-  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
   uint32_t x413;
   fiat_p256_uint1 x414;
-  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
   uint32_t x415;
   fiat_p256_uint1 x416;
-  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
   uint32_t x417;
   fiat_p256_uint1 x418;
-  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
   uint32_t x419;
   fiat_p256_uint1 x420;
-  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
   uint32_t x421;
   fiat_p256_uint1 x422;
-  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
   uint32_t x423;
   fiat_p256_uint1 x424;
-  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
-  uint32_t x425 = (x424 + x396);
+  uint32_t x425;
   uint32_t x426;
   fiat_p256_uint1 x427;
-  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
   uint32_t x428;
   fiat_p256_uint1 x429;
-  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
   uint32_t x430;
   fiat_p256_uint1 x431;
-  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
   uint32_t x432;
   fiat_p256_uint1 x433;
-  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
   uint32_t x434;
   fiat_p256_uint1 x435;
-  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
   uint32_t x436;
   fiat_p256_uint1 x437;
-  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
   uint32_t x438;
   fiat_p256_uint1 x439;
-  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
   uint32_t x440;
   fiat_p256_uint1 x441;
-  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
   uint32_t x442;
   fiat_p256_uint1 x443;
-  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
   uint32_t x444;
   uint32_t x445;
-  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
   uint32_t x446;
   uint32_t x447;
-  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
   uint32_t x448;
   uint32_t x449;
-  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
   uint32_t x450;
   uint32_t x451;
-  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
   uint32_t x452;
   fiat_p256_uint1 x453;
-  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
   uint32_t x454;
   fiat_p256_uint1 x455;
-  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
-  uint32_t x456 = (x455 + x447);
+  uint32_t x456;
   uint32_t x457;
   fiat_p256_uint1 x458;
-  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
   uint32_t x459;
   fiat_p256_uint1 x460;
-  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
   uint32_t x461;
   fiat_p256_uint1 x462;
-  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
   uint32_t x463;
   fiat_p256_uint1 x464;
-  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
   uint32_t x465;
   fiat_p256_uint1 x466;
-  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
   uint32_t x467;
   fiat_p256_uint1 x468;
-  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
   uint32_t x469;
   fiat_p256_uint1 x470;
-  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
   uint32_t x471;
   fiat_p256_uint1 x472;
-  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
   uint32_t x473;
   fiat_p256_uint1 x474;
-  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
-  uint32_t x475 = ((uint32_t)x474 + x443);
+  uint32_t x475;
   uint32_t x476;
   uint32_t x477;
-  fiat_p256_mulx_u32(&x476, &x477, x6, (arg1[7]));
   uint32_t x478;
   uint32_t x479;
-  fiat_p256_mulx_u32(&x478, &x479, x6, (arg1[6]));
   uint32_t x480;
   uint32_t x481;
-  fiat_p256_mulx_u32(&x480, &x481, x6, (arg1[5]));
   uint32_t x482;
   uint32_t x483;
-  fiat_p256_mulx_u32(&x482, &x483, x6, (arg1[4]));
   uint32_t x484;
   uint32_t x485;
-  fiat_p256_mulx_u32(&x484, &x485, x6, (arg1[3]));
   uint32_t x486;
   uint32_t x487;
-  fiat_p256_mulx_u32(&x486, &x487, x6, (arg1[2]));
   uint32_t x488;
   uint32_t x489;
-  fiat_p256_mulx_u32(&x488, &x489, x6, (arg1[1]));
   uint32_t x490;
   uint32_t x491;
-  fiat_p256_mulx_u32(&x490, &x491, x6, (arg1[0]));
   uint32_t x492;
   fiat_p256_uint1 x493;
-  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
   uint32_t x494;
   fiat_p256_uint1 x495;
-  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
   uint32_t x496;
   fiat_p256_uint1 x497;
-  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
   uint32_t x498;
   fiat_p256_uint1 x499;
-  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
   uint32_t x500;
   fiat_p256_uint1 x501;
-  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
   uint32_t x502;
   fiat_p256_uint1 x503;
-  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
   uint32_t x504;
   fiat_p256_uint1 x505;
-  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
-  uint32_t x506 = (x505 + x477);
+  uint32_t x506;
   uint32_t x507;
   fiat_p256_uint1 x508;
-  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
   uint32_t x509;
   fiat_p256_uint1 x510;
-  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
   uint32_t x511;
   fiat_p256_uint1 x512;
-  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
   uint32_t x513;
   fiat_p256_uint1 x514;
-  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
   uint32_t x515;
   fiat_p256_uint1 x516;
-  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
   uint32_t x517;
   fiat_p256_uint1 x518;
-  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
   uint32_t x519;
   fiat_p256_uint1 x520;
-  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
   uint32_t x521;
   fiat_p256_uint1 x522;
-  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
   uint32_t x523;
   fiat_p256_uint1 x524;
-  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
   uint32_t x525;
   uint32_t x526;
-  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
   uint32_t x527;
   uint32_t x528;
-  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
   uint32_t x529;
   uint32_t x530;
-  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
   uint32_t x531;
   uint32_t x532;
-  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
   uint32_t x533;
   fiat_p256_uint1 x534;
-  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
   uint32_t x535;
   fiat_p256_uint1 x536;
-  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
-  uint32_t x537 = (x536 + x528);
+  uint32_t x537;
   uint32_t x538;
   fiat_p256_uint1 x539;
-  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
   uint32_t x540;
   fiat_p256_uint1 x541;
-  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
   uint32_t x542;
   fiat_p256_uint1 x543;
-  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
   uint32_t x544;
   fiat_p256_uint1 x545;
-  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
   uint32_t x546;
   fiat_p256_uint1 x547;
-  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
   uint32_t x548;
   fiat_p256_uint1 x549;
-  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
   uint32_t x550;
   fiat_p256_uint1 x551;
-  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
   uint32_t x552;
   fiat_p256_uint1 x553;
-  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
   uint32_t x554;
   fiat_p256_uint1 x555;
-  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
-  uint32_t x556 = ((uint32_t)x555 + x524);
+  uint32_t x556;
   uint32_t x557;
   uint32_t x558;
-  fiat_p256_mulx_u32(&x557, &x558, x7, (arg1[7]));
   uint32_t x559;
   uint32_t x560;
-  fiat_p256_mulx_u32(&x559, &x560, x7, (arg1[6]));
   uint32_t x561;
   uint32_t x562;
-  fiat_p256_mulx_u32(&x561, &x562, x7, (arg1[5]));
   uint32_t x563;
   uint32_t x564;
-  fiat_p256_mulx_u32(&x563, &x564, x7, (arg1[4]));
   uint32_t x565;
   uint32_t x566;
-  fiat_p256_mulx_u32(&x565, &x566, x7, (arg1[3]));
   uint32_t x567;
   uint32_t x568;
-  fiat_p256_mulx_u32(&x567, &x568, x7, (arg1[2]));
   uint32_t x569;
   uint32_t x570;
-  fiat_p256_mulx_u32(&x569, &x570, x7, (arg1[1]));
   uint32_t x571;
   uint32_t x572;
-  fiat_p256_mulx_u32(&x571, &x572, x7, (arg1[0]));
   uint32_t x573;
   fiat_p256_uint1 x574;
-  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
   uint32_t x575;
   fiat_p256_uint1 x576;
-  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
   uint32_t x577;
   fiat_p256_uint1 x578;
-  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
   uint32_t x579;
   fiat_p256_uint1 x580;
-  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
   uint32_t x581;
   fiat_p256_uint1 x582;
-  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
   uint32_t x583;
   fiat_p256_uint1 x584;
-  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
   uint32_t x585;
   fiat_p256_uint1 x586;
-  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
-  uint32_t x587 = (x586 + x558);
+  uint32_t x587;
   uint32_t x588;
   fiat_p256_uint1 x589;
-  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
   uint32_t x590;
   fiat_p256_uint1 x591;
-  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
   uint32_t x592;
   fiat_p256_uint1 x593;
-  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
   uint32_t x594;
   fiat_p256_uint1 x595;
-  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
   uint32_t x596;
   fiat_p256_uint1 x597;
-  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
   uint32_t x598;
   fiat_p256_uint1 x599;
-  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
   uint32_t x600;
   fiat_p256_uint1 x601;
-  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
   uint32_t x602;
   fiat_p256_uint1 x603;
-  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
   uint32_t x604;
   fiat_p256_uint1 x605;
-  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
   uint32_t x606;
   uint32_t x607;
-  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
   uint32_t x608;
   uint32_t x609;
-  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
   uint32_t x610;
   uint32_t x611;
-  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
   uint32_t x612;
   uint32_t x613;
-  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
   uint32_t x614;
   fiat_p256_uint1 x615;
-  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
   uint32_t x616;
   fiat_p256_uint1 x617;
-  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
-  uint32_t x618 = (x617 + x609);
+  uint32_t x618;
   uint32_t x619;
   fiat_p256_uint1 x620;
-  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
   uint32_t x621;
   fiat_p256_uint1 x622;
-  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
   uint32_t x623;
   fiat_p256_uint1 x624;
-  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
   uint32_t x625;
   fiat_p256_uint1 x626;
-  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
   uint32_t x627;
   fiat_p256_uint1 x628;
-  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
   uint32_t x629;
   fiat_p256_uint1 x630;
-  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
   uint32_t x631;
   fiat_p256_uint1 x632;
-  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
   uint32_t x633;
   fiat_p256_uint1 x634;
-  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
   uint32_t x635;
   fiat_p256_uint1 x636;
-  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
-  uint32_t x637 = ((uint32_t)x636 + x605);
+  uint32_t x637;
   uint32_t x638;
   fiat_p256_uint1 x639;
-  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
   uint32_t x640;
   fiat_p256_uint1 x641;
-  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
   uint32_t x642;
   fiat_p256_uint1 x643;
-  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
   uint32_t x644;
   fiat_p256_uint1 x645;
-  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
   uint32_t x646;
   fiat_p256_uint1 x647;
-  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
   uint32_t x648;
   fiat_p256_uint1 x649;
-  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
   uint32_t x650;
   fiat_p256_uint1 x651;
-  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
   uint32_t x652;
   fiat_p256_uint1 x653;
-  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
   uint32_t x654;
   fiat_p256_uint1 x655;
-  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
   uint32_t x656;
-  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
   uint32_t x657;
-  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
   uint32_t x658;
-  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
   uint32_t x659;
-  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
   uint32_t x660;
-  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
   uint32_t x661;
-  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
   uint32_t x662;
-  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
   uint32_t x663;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[4]);
+  x5 = (arg1[5]);
+  x6 = (arg1[6]);
+  x7 = (arg1[7]);
+  x8 = (arg1[0]);
+  fiat_p256_mulx_u32(&x9, &x10, x8, (arg1[7]));
+  fiat_p256_mulx_u32(&x11, &x12, x8, (arg1[6]));
+  fiat_p256_mulx_u32(&x13, &x14, x8, (arg1[5]));
+  fiat_p256_mulx_u32(&x15, &x16, x8, (arg1[4]));
+  fiat_p256_mulx_u32(&x17, &x18, x8, (arg1[3]));
+  fiat_p256_mulx_u32(&x19, &x20, x8, (arg1[2]));
+  fiat_p256_mulx_u32(&x21, &x22, x8, (arg1[1]));
+  fiat_p256_mulx_u32(&x23, &x24, x8, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
+  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
+  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
+  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
+  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
+  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
+  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
+  x39 = (x38 + x10);
+  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
+  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
+  x52 = (x51 + x43);
+  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
+  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
+  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
+  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
+  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
+  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
+  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
+  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
+  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
+  fiat_p256_mulx_u32(&x71, &x72, x1, (arg1[7]));
+  fiat_p256_mulx_u32(&x73, &x74, x1, (arg1[6]));
+  fiat_p256_mulx_u32(&x75, &x76, x1, (arg1[5]));
+  fiat_p256_mulx_u32(&x77, &x78, x1, (arg1[4]));
+  fiat_p256_mulx_u32(&x79, &x80, x1, (arg1[3]));
+  fiat_p256_mulx_u32(&x81, &x82, x1, (arg1[2]));
+  fiat_p256_mulx_u32(&x83, &x84, x1, (arg1[1]));
+  fiat_p256_mulx_u32(&x85, &x86, x1, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
+  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
+  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
+  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
+  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
+  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
+  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
+  x101 = (x100 + x72);
+  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
+  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
+  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
+  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
+  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
+  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
+  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
+  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
+  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
+  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
+  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
+  x132 = (x131 + x123);
+  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
+  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
+  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
+  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
+  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
+  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
+  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
+  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
+  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
+  x151 = ((uint32_t)x150 + x119);
+  fiat_p256_mulx_u32(&x152, &x153, x2, (arg1[7]));
+  fiat_p256_mulx_u32(&x154, &x155, x2, (arg1[6]));
+  fiat_p256_mulx_u32(&x156, &x157, x2, (arg1[5]));
+  fiat_p256_mulx_u32(&x158, &x159, x2, (arg1[4]));
+  fiat_p256_mulx_u32(&x160, &x161, x2, (arg1[3]));
+  fiat_p256_mulx_u32(&x162, &x163, x2, (arg1[2]));
+  fiat_p256_mulx_u32(&x164, &x165, x2, (arg1[1]));
+  fiat_p256_mulx_u32(&x166, &x167, x2, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
+  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
+  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
+  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
+  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
+  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
+  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
+  x182 = (x181 + x153);
+  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
+  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
+  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
+  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
+  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
+  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
+  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
+  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
+  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
+  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
+  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
+  x213 = (x212 + x204);
+  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
+  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
+  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
+  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
+  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
+  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
+  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
+  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
+  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
+  x232 = ((uint32_t)x231 + x200);
+  fiat_p256_mulx_u32(&x233, &x234, x3, (arg1[7]));
+  fiat_p256_mulx_u32(&x235, &x236, x3, (arg1[6]));
+  fiat_p256_mulx_u32(&x237, &x238, x3, (arg1[5]));
+  fiat_p256_mulx_u32(&x239, &x240, x3, (arg1[4]));
+  fiat_p256_mulx_u32(&x241, &x242, x3, (arg1[3]));
+  fiat_p256_mulx_u32(&x243, &x244, x3, (arg1[2]));
+  fiat_p256_mulx_u32(&x245, &x246, x3, (arg1[1]));
+  fiat_p256_mulx_u32(&x247, &x248, x3, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
+  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
+  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
+  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
+  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
+  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
+  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
+  x263 = (x262 + x234);
+  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
+  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
+  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
+  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
+  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
+  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
+  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
+  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
+  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
+  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
+  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
+  x294 = (x293 + x285);
+  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
+  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
+  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
+  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
+  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
+  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
+  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
+  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
+  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
+  x313 = ((uint32_t)x312 + x281);
+  fiat_p256_mulx_u32(&x314, &x315, x4, (arg1[7]));
+  fiat_p256_mulx_u32(&x316, &x317, x4, (arg1[6]));
+  fiat_p256_mulx_u32(&x318, &x319, x4, (arg1[5]));
+  fiat_p256_mulx_u32(&x320, &x321, x4, (arg1[4]));
+  fiat_p256_mulx_u32(&x322, &x323, x4, (arg1[3]));
+  fiat_p256_mulx_u32(&x324, &x325, x4, (arg1[2]));
+  fiat_p256_mulx_u32(&x326, &x327, x4, (arg1[1]));
+  fiat_p256_mulx_u32(&x328, &x329, x4, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
+  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
+  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
+  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
+  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
+  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
+  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
+  x344 = (x343 + x315);
+  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
+  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
+  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
+  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
+  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
+  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
+  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
+  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
+  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
+  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
+  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
+  x375 = (x374 + x366);
+  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
+  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
+  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
+  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
+  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
+  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
+  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
+  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
+  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
+  x394 = ((uint32_t)x393 + x362);
+  fiat_p256_mulx_u32(&x395, &x396, x5, (arg1[7]));
+  fiat_p256_mulx_u32(&x397, &x398, x5, (arg1[6]));
+  fiat_p256_mulx_u32(&x399, &x400, x5, (arg1[5]));
+  fiat_p256_mulx_u32(&x401, &x402, x5, (arg1[4]));
+  fiat_p256_mulx_u32(&x403, &x404, x5, (arg1[3]));
+  fiat_p256_mulx_u32(&x405, &x406, x5, (arg1[2]));
+  fiat_p256_mulx_u32(&x407, &x408, x5, (arg1[1]));
+  fiat_p256_mulx_u32(&x409, &x410, x5, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
+  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
+  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
+  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
+  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
+  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
+  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
+  x425 = (x424 + x396);
+  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
+  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
+  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
+  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
+  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
+  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
+  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
+  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
+  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
+  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
+  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
+  x456 = (x455 + x447);
+  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
+  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
+  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
+  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
+  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
+  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
+  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
+  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
+  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
+  x475 = ((uint32_t)x474 + x443);
+  fiat_p256_mulx_u32(&x476, &x477, x6, (arg1[7]));
+  fiat_p256_mulx_u32(&x478, &x479, x6, (arg1[6]));
+  fiat_p256_mulx_u32(&x480, &x481, x6, (arg1[5]));
+  fiat_p256_mulx_u32(&x482, &x483, x6, (arg1[4]));
+  fiat_p256_mulx_u32(&x484, &x485, x6, (arg1[3]));
+  fiat_p256_mulx_u32(&x486, &x487, x6, (arg1[2]));
+  fiat_p256_mulx_u32(&x488, &x489, x6, (arg1[1]));
+  fiat_p256_mulx_u32(&x490, &x491, x6, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
+  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
+  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
+  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
+  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
+  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
+  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
+  x506 = (x505 + x477);
+  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
+  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
+  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
+  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
+  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
+  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
+  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
+  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
+  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
+  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
+  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
+  x537 = (x536 + x528);
+  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
+  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
+  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
+  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
+  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
+  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
+  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
+  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
+  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
+  x556 = ((uint32_t)x555 + x524);
+  fiat_p256_mulx_u32(&x557, &x558, x7, (arg1[7]));
+  fiat_p256_mulx_u32(&x559, &x560, x7, (arg1[6]));
+  fiat_p256_mulx_u32(&x561, &x562, x7, (arg1[5]));
+  fiat_p256_mulx_u32(&x563, &x564, x7, (arg1[4]));
+  fiat_p256_mulx_u32(&x565, &x566, x7, (arg1[3]));
+  fiat_p256_mulx_u32(&x567, &x568, x7, (arg1[2]));
+  fiat_p256_mulx_u32(&x569, &x570, x7, (arg1[1]));
+  fiat_p256_mulx_u32(&x571, &x572, x7, (arg1[0]));
+  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
+  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
+  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
+  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
+  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
+  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
+  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
+  x587 = (x586 + x558);
+  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
+  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
+  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
+  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
+  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
+  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
+  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
+  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
+  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
+  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
+  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
+  x618 = (x617 + x609);
+  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
+  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
+  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
+  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
+  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
+  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
+  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
+  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
+  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
+  x637 = ((uint32_t)x636 + x605);
+  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
+  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
+  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
+  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
+  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
+  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
+  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
+  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
+  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
+  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
+  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
+  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
   fiat_p256_cmovznz_u32(&x663, x655, x652, x635);
   out1[0] = x656;
   out1[1] = x657;
@@ -2128,6 +2221,7 @@
 
 /*
  * The function fiat_p256_add adds two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -2135,79 +2229,74 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_add(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_add(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
   uint32_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_addcarryx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
   uint32_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_addcarryx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
   uint32_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_addcarryx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
   uint32_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_addcarryx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
   uint32_t x9;
   fiat_p256_uint1 x10;
-  fiat_p256_addcarryx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
   uint32_t x11;
   fiat_p256_uint1 x12;
-  fiat_p256_addcarryx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
   uint32_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_addcarryx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
   uint32_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_addcarryx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
   uint32_t x17;
   fiat_p256_uint1 x18;
-  fiat_p256_subborrowx_u32(&x17, &x18, 0x0, x1, UINT32_C(0xffffffff));
   uint32_t x19;
   fiat_p256_uint1 x20;
-  fiat_p256_subborrowx_u32(&x19, &x20, x18, x3, UINT32_C(0xffffffff));
   uint32_t x21;
   fiat_p256_uint1 x22;
-  fiat_p256_subborrowx_u32(&x21, &x22, x20, x5, UINT32_C(0xffffffff));
   uint32_t x23;
   fiat_p256_uint1 x24;
-  fiat_p256_subborrowx_u32(&x23, &x24, x22, x7, 0x0);
   uint32_t x25;
   fiat_p256_uint1 x26;
-  fiat_p256_subborrowx_u32(&x25, &x26, x24, x9, 0x0);
   uint32_t x27;
   fiat_p256_uint1 x28;
-  fiat_p256_subborrowx_u32(&x27, &x28, x26, x11, 0x0);
   uint32_t x29;
   fiat_p256_uint1 x30;
-  fiat_p256_subborrowx_u32(&x29, &x30, x28, x13, 0x1);
   uint32_t x31;
   fiat_p256_uint1 x32;
-  fiat_p256_subborrowx_u32(&x31, &x32, x30, x15, UINT32_C(0xffffffff));
   uint32_t x33;
   fiat_p256_uint1 x34;
-  fiat_p256_subborrowx_u32(&x33, &x34, x32, x16, 0x0);
   uint32_t x35;
-  fiat_p256_cmovznz_u32(&x35, x34, x17, x1);
   uint32_t x36;
-  fiat_p256_cmovznz_u32(&x36, x34, x19, x3);
   uint32_t x37;
-  fiat_p256_cmovznz_u32(&x37, x34, x21, x5);
   uint32_t x38;
-  fiat_p256_cmovznz_u32(&x38, x34, x23, x7);
   uint32_t x39;
-  fiat_p256_cmovznz_u32(&x39, x34, x25, x9);
   uint32_t x40;
-  fiat_p256_cmovznz_u32(&x40, x34, x27, x11);
   uint32_t x41;
-  fiat_p256_cmovznz_u32(&x41, x34, x29, x13);
   uint32_t x42;
+  fiat_p256_addcarryx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+  fiat_p256_addcarryx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+  fiat_p256_addcarryx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+  fiat_p256_addcarryx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+  fiat_p256_addcarryx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+  fiat_p256_addcarryx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+  fiat_p256_addcarryx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
+  fiat_p256_addcarryx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
+  fiat_p256_subborrowx_u32(&x17, &x18, 0x0, x1, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x19, &x20, x18, x3, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x21, &x22, x20, x5, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x23, &x24, x22, x7, 0x0);
+  fiat_p256_subborrowx_u32(&x25, &x26, x24, x9, 0x0);
+  fiat_p256_subborrowx_u32(&x27, &x28, x26, x11, 0x0);
+  fiat_p256_subborrowx_u32(&x29, &x30, x28, x13, 0x1);
+  fiat_p256_subborrowx_u32(&x31, &x32, x30, x15, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x33, &x34, x32, x16, 0x0);
+  fiat_p256_cmovznz_u32(&x35, x34, x17, x1);
+  fiat_p256_cmovznz_u32(&x36, x34, x19, x3);
+  fiat_p256_cmovznz_u32(&x37, x34, x21, x5);
+  fiat_p256_cmovznz_u32(&x38, x34, x23, x7);
+  fiat_p256_cmovznz_u32(&x39, x34, x25, x9);
+  fiat_p256_cmovznz_u32(&x40, x34, x27, x11);
+  fiat_p256_cmovznz_u32(&x41, x34, x29, x13);
   fiat_p256_cmovznz_u32(&x42, x34, x31, x15);
   out1[0] = x35;
   out1[1] = x36;
@@ -2221,6 +2310,7 @@
 
 /*
  * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -2228,63 +2318,58 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_sub(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
   uint32_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
   uint32_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_subborrowx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
   uint32_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_subborrowx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
   uint32_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_subborrowx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
   uint32_t x9;
   fiat_p256_uint1 x10;
-  fiat_p256_subborrowx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
   uint32_t x11;
   fiat_p256_uint1 x12;
-  fiat_p256_subborrowx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
   uint32_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_subborrowx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
   uint32_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_subborrowx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
   uint32_t x17;
-  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
   uint32_t x18;
   fiat_p256_uint1 x19;
-  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, (x17 & UINT32_C(0xffffffff)));
   uint32_t x20;
   fiat_p256_uint1 x21;
-  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, (x17 & UINT32_C(0xffffffff)));
   uint32_t x22;
   fiat_p256_uint1 x23;
-  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, (x17 & UINT32_C(0xffffffff)));
   uint32_t x24;
   fiat_p256_uint1 x25;
-  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
   uint32_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
   uint32_t x28;
   fiat_p256_uint1 x29;
-  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
   uint32_t x30;
   fiat_p256_uint1 x31;
-  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
   uint32_t x32;
   fiat_p256_uint1 x33;
-  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, (x17 & UINT32_C(0xffffffff)));
+  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+  fiat_p256_subborrowx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+  fiat_p256_subborrowx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+  fiat_p256_subborrowx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+  fiat_p256_subborrowx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+  fiat_p256_subborrowx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+  fiat_p256_subborrowx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
+  fiat_p256_subborrowx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
+  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, x17);
+  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, x17);
+  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, x17);
+  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
+  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
+  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
+  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
+  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, x17);
   out1[0] = x18;
   out1[1] = x20;
   out1[2] = x22;
@@ -2297,68 +2382,65 @@
 
 /*
  * The function fiat_p256_opp negates a field element in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_opp(uint32_t out1[8], const uint32_t arg1[8]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
   uint32_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, 0x0, (arg1[0]));
   uint32_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_subborrowx_u32(&x3, &x4, x2, 0x0, (arg1[1]));
   uint32_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_subborrowx_u32(&x5, &x6, x4, 0x0, (arg1[2]));
   uint32_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_subborrowx_u32(&x7, &x8, x6, 0x0, (arg1[3]));
   uint32_t x9;
   fiat_p256_uint1 x10;
-  fiat_p256_subborrowx_u32(&x9, &x10, x8, 0x0, (arg1[4]));
   uint32_t x11;
   fiat_p256_uint1 x12;
-  fiat_p256_subborrowx_u32(&x11, &x12, x10, 0x0, (arg1[5]));
   uint32_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_subborrowx_u32(&x13, &x14, x12, 0x0, (arg1[6]));
   uint32_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_subborrowx_u32(&x15, &x16, x14, 0x0, (arg1[7]));
   uint32_t x17;
-  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
   uint32_t x18;
   fiat_p256_uint1 x19;
-  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, (x17 & UINT32_C(0xffffffff)));
   uint32_t x20;
   fiat_p256_uint1 x21;
-  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, (x17 & UINT32_C(0xffffffff)));
   uint32_t x22;
   fiat_p256_uint1 x23;
-  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, (x17 & UINT32_C(0xffffffff)));
   uint32_t x24;
   fiat_p256_uint1 x25;
-  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
   uint32_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
   uint32_t x28;
   fiat_p256_uint1 x29;
-  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
   uint32_t x30;
   fiat_p256_uint1 x31;
-  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
   uint32_t x32;
   fiat_p256_uint1 x33;
-  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, (x17 & UINT32_C(0xffffffff)));
+  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, 0x0, (arg1[0]));
+  fiat_p256_subborrowx_u32(&x3, &x4, x2, 0x0, (arg1[1]));
+  fiat_p256_subborrowx_u32(&x5, &x6, x4, 0x0, (arg1[2]));
+  fiat_p256_subborrowx_u32(&x7, &x8, x6, 0x0, (arg1[3]));
+  fiat_p256_subborrowx_u32(&x9, &x10, x8, 0x0, (arg1[4]));
+  fiat_p256_subborrowx_u32(&x11, &x12, x10, 0x0, (arg1[5]));
+  fiat_p256_subborrowx_u32(&x13, &x14, x12, 0x0, (arg1[6]));
+  fiat_p256_subborrowx_u32(&x15, &x16, x14, 0x0, (arg1[7]));
+  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, x17);
+  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, x17);
+  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, x17);
+  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
+  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
+  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
+  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
+  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, x17);
   out1[0] = x18;
   out1[1] = x20;
   out1[2] = x22;
@@ -2371,532 +2453,530 @@
 
 /*
  * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * ((2^32)⁻¹ mod m)^8) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_from_montgomery(uint32_t out1[8], const uint32_t arg1[8]) {
-  uint32_t x1 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_from_montgomery(fiat_p256_non_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
+  uint32_t x1;
   uint32_t x2;
   uint32_t x3;
-  fiat_p256_mulx_u32(&x2, &x3, x1, UINT32_C(0xffffffff));
   uint32_t x4;
   uint32_t x5;
-  fiat_p256_mulx_u32(&x4, &x5, x1, UINT32_C(0xffffffff));
   uint32_t x6;
   uint32_t x7;
-  fiat_p256_mulx_u32(&x6, &x7, x1, UINT32_C(0xffffffff));
   uint32_t x8;
   uint32_t x9;
-  fiat_p256_mulx_u32(&x8, &x9, x1, UINT32_C(0xffffffff));
   uint32_t x10;
   fiat_p256_uint1 x11;
-  fiat_p256_addcarryx_u32(&x10, &x11, 0x0, x9, x6);
   uint32_t x12;
   fiat_p256_uint1 x13;
-  fiat_p256_addcarryx_u32(&x12, &x13, x11, x7, x4);
   uint32_t x14;
   fiat_p256_uint1 x15;
-  fiat_p256_addcarryx_u32(&x14, &x15, 0x0, x1, x8);
   uint32_t x16;
   fiat_p256_uint1 x17;
-  fiat_p256_addcarryx_u32(&x16, &x17, x15, 0x0, x10);
   uint32_t x18;
   fiat_p256_uint1 x19;
-  fiat_p256_addcarryx_u32(&x18, &x19, x17, 0x0, x12);
   uint32_t x20;
   fiat_p256_uint1 x21;
-  fiat_p256_addcarryx_u32(&x20, &x21, x19, 0x0, (x13 + x5));
   uint32_t x22;
   fiat_p256_uint1 x23;
-  fiat_p256_addcarryx_u32(&x22, &x23, 0x0, x16, (arg1[1]));
   uint32_t x24;
   fiat_p256_uint1 x25;
-  fiat_p256_addcarryx_u32(&x24, &x25, x23, x18, 0x0);
   uint32_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u32(&x26, &x27, x25, x20, 0x0);
   uint32_t x28;
   uint32_t x29;
-  fiat_p256_mulx_u32(&x28, &x29, x22, UINT32_C(0xffffffff));
   uint32_t x30;
   uint32_t x31;
-  fiat_p256_mulx_u32(&x30, &x31, x22, UINT32_C(0xffffffff));
   uint32_t x32;
   uint32_t x33;
-  fiat_p256_mulx_u32(&x32, &x33, x22, UINT32_C(0xffffffff));
   uint32_t x34;
   uint32_t x35;
-  fiat_p256_mulx_u32(&x34, &x35, x22, UINT32_C(0xffffffff));
   uint32_t x36;
   fiat_p256_uint1 x37;
-  fiat_p256_addcarryx_u32(&x36, &x37, 0x0, x35, x32);
   uint32_t x38;
   fiat_p256_uint1 x39;
-  fiat_p256_addcarryx_u32(&x38, &x39, x37, x33, x30);
   uint32_t x40;
   fiat_p256_uint1 x41;
-  fiat_p256_addcarryx_u32(&x40, &x41, 0x0, x22, x34);
   uint32_t x42;
   fiat_p256_uint1 x43;
-  fiat_p256_addcarryx_u32(&x42, &x43, x41, x24, x36);
   uint32_t x44;
   fiat_p256_uint1 x45;
-  fiat_p256_addcarryx_u32(&x44, &x45, x43, x26, x38);
   uint32_t x46;
   fiat_p256_uint1 x47;
-  fiat_p256_addcarryx_u32(&x46, &x47, x45, ((uint32_t)x27 + x21), (x39 + x31));
   uint32_t x48;
   fiat_p256_uint1 x49;
-  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x2, x22);
   uint32_t x50;
   fiat_p256_uint1 x51;
-  fiat_p256_addcarryx_u32(&x50, &x51, x49, x3, x28);
   uint32_t x52;
   fiat_p256_uint1 x53;
-  fiat_p256_addcarryx_u32(&x52, &x53, 0x0, x42, (arg1[2]));
   uint32_t x54;
   fiat_p256_uint1 x55;
-  fiat_p256_addcarryx_u32(&x54, &x55, x53, x44, 0x0);
   uint32_t x56;
   fiat_p256_uint1 x57;
-  fiat_p256_addcarryx_u32(&x56, &x57, x55, x46, 0x0);
   uint32_t x58;
   uint32_t x59;
-  fiat_p256_mulx_u32(&x58, &x59, x52, UINT32_C(0xffffffff));
   uint32_t x60;
   uint32_t x61;
-  fiat_p256_mulx_u32(&x60, &x61, x52, UINT32_C(0xffffffff));
   uint32_t x62;
   uint32_t x63;
-  fiat_p256_mulx_u32(&x62, &x63, x52, UINT32_C(0xffffffff));
   uint32_t x64;
   uint32_t x65;
-  fiat_p256_mulx_u32(&x64, &x65, x52, UINT32_C(0xffffffff));
   uint32_t x66;
   fiat_p256_uint1 x67;
-  fiat_p256_addcarryx_u32(&x66, &x67, 0x0, x65, x62);
   uint32_t x68;
   fiat_p256_uint1 x69;
-  fiat_p256_addcarryx_u32(&x68, &x69, x67, x63, x60);
   uint32_t x70;
   fiat_p256_uint1 x71;
-  fiat_p256_addcarryx_u32(&x70, &x71, 0x0, x52, x64);
   uint32_t x72;
   fiat_p256_uint1 x73;
-  fiat_p256_addcarryx_u32(&x72, &x73, x71, x54, x66);
   uint32_t x74;
   fiat_p256_uint1 x75;
-  fiat_p256_addcarryx_u32(&x74, &x75, x73, x56, x68);
   uint32_t x76;
   fiat_p256_uint1 x77;
-  fiat_p256_addcarryx_u32(&x76, &x77, x75, ((uint32_t)x57 + x47), (x69 + x61));
   uint32_t x78;
   fiat_p256_uint1 x79;
-  fiat_p256_addcarryx_u32(&x78, &x79, x77, x1, 0x0);
   uint32_t x80;
   fiat_p256_uint1 x81;
-  fiat_p256_addcarryx_u32(&x80, &x81, x79, x48, 0x0);
   uint32_t x82;
   fiat_p256_uint1 x83;
-  fiat_p256_addcarryx_u32(&x82, &x83, x81, x50, x52);
   uint32_t x84;
   fiat_p256_uint1 x85;
-  fiat_p256_addcarryx_u32(&x84, &x85, x83, (x51 + x29), x58);
   uint32_t x86;
   fiat_p256_uint1 x87;
-  fiat_p256_addcarryx_u32(&x86, &x87, 0x0, x72, (arg1[3]));
   uint32_t x88;
   fiat_p256_uint1 x89;
-  fiat_p256_addcarryx_u32(&x88, &x89, x87, x74, 0x0);
   uint32_t x90;
   fiat_p256_uint1 x91;
-  fiat_p256_addcarryx_u32(&x90, &x91, x89, x76, 0x0);
   uint32_t x92;
   fiat_p256_uint1 x93;
-  fiat_p256_addcarryx_u32(&x92, &x93, x91, x78, 0x0);
   uint32_t x94;
   fiat_p256_uint1 x95;
-  fiat_p256_addcarryx_u32(&x94, &x95, x93, x80, 0x0);
   uint32_t x96;
   fiat_p256_uint1 x97;
-  fiat_p256_addcarryx_u32(&x96, &x97, x95, x82, 0x0);
   uint32_t x98;
   fiat_p256_uint1 x99;
-  fiat_p256_addcarryx_u32(&x98, &x99, x97, x84, 0x0);
   uint32_t x100;
   fiat_p256_uint1 x101;
-  fiat_p256_addcarryx_u32(&x100, &x101, x99, (x85 + x59), 0x0);
   uint32_t x102;
   uint32_t x103;
-  fiat_p256_mulx_u32(&x102, &x103, x86, UINT32_C(0xffffffff));
   uint32_t x104;
   uint32_t x105;
-  fiat_p256_mulx_u32(&x104, &x105, x86, UINT32_C(0xffffffff));
   uint32_t x106;
   uint32_t x107;
-  fiat_p256_mulx_u32(&x106, &x107, x86, UINT32_C(0xffffffff));
   uint32_t x108;
   uint32_t x109;
-  fiat_p256_mulx_u32(&x108, &x109, x86, UINT32_C(0xffffffff));
   uint32_t x110;
   fiat_p256_uint1 x111;
-  fiat_p256_addcarryx_u32(&x110, &x111, 0x0, x109, x106);
   uint32_t x112;
   fiat_p256_uint1 x113;
-  fiat_p256_addcarryx_u32(&x112, &x113, x111, x107, x104);
   uint32_t x114;
   fiat_p256_uint1 x115;
-  fiat_p256_addcarryx_u32(&x114, &x115, 0x0, x86, x108);
   uint32_t x116;
   fiat_p256_uint1 x117;
-  fiat_p256_addcarryx_u32(&x116, &x117, x115, x88, x110);
   uint32_t x118;
   fiat_p256_uint1 x119;
-  fiat_p256_addcarryx_u32(&x118, &x119, x117, x90, x112);
   uint32_t x120;
   fiat_p256_uint1 x121;
-  fiat_p256_addcarryx_u32(&x120, &x121, x119, x92, (x113 + x105));
   uint32_t x122;
   fiat_p256_uint1 x123;
-  fiat_p256_addcarryx_u32(&x122, &x123, x121, x94, 0x0);
   uint32_t x124;
   fiat_p256_uint1 x125;
-  fiat_p256_addcarryx_u32(&x124, &x125, x123, x96, 0x0);
   uint32_t x126;
   fiat_p256_uint1 x127;
-  fiat_p256_addcarryx_u32(&x126, &x127, x125, x98, x86);
   uint32_t x128;
   fiat_p256_uint1 x129;
-  fiat_p256_addcarryx_u32(&x128, &x129, x127, x100, x102);
   uint32_t x130;
   fiat_p256_uint1 x131;
-  fiat_p256_addcarryx_u32(&x130, &x131, x129, x101, x103);
   uint32_t x132;
   fiat_p256_uint1 x133;
-  fiat_p256_addcarryx_u32(&x132, &x133, 0x0, x116, (arg1[4]));
   uint32_t x134;
   fiat_p256_uint1 x135;
-  fiat_p256_addcarryx_u32(&x134, &x135, x133, x118, 0x0);
   uint32_t x136;
   fiat_p256_uint1 x137;
-  fiat_p256_addcarryx_u32(&x136, &x137, x135, x120, 0x0);
   uint32_t x138;
   fiat_p256_uint1 x139;
-  fiat_p256_addcarryx_u32(&x138, &x139, x137, x122, 0x0);
   uint32_t x140;
   fiat_p256_uint1 x141;
-  fiat_p256_addcarryx_u32(&x140, &x141, x139, x124, 0x0);
   uint32_t x142;
   fiat_p256_uint1 x143;
-  fiat_p256_addcarryx_u32(&x142, &x143, x141, x126, 0x0);
   uint32_t x144;
   fiat_p256_uint1 x145;
-  fiat_p256_addcarryx_u32(&x144, &x145, x143, x128, 0x0);
   uint32_t x146;
   fiat_p256_uint1 x147;
-  fiat_p256_addcarryx_u32(&x146, &x147, x145, x130, 0x0);
   uint32_t x148;
   uint32_t x149;
-  fiat_p256_mulx_u32(&x148, &x149, x132, UINT32_C(0xffffffff));
   uint32_t x150;
   uint32_t x151;
-  fiat_p256_mulx_u32(&x150, &x151, x132, UINT32_C(0xffffffff));
   uint32_t x152;
   uint32_t x153;
-  fiat_p256_mulx_u32(&x152, &x153, x132, UINT32_C(0xffffffff));
   uint32_t x154;
   uint32_t x155;
-  fiat_p256_mulx_u32(&x154, &x155, x132, UINT32_C(0xffffffff));
   uint32_t x156;
   fiat_p256_uint1 x157;
-  fiat_p256_addcarryx_u32(&x156, &x157, 0x0, x155, x152);
   uint32_t x158;
   fiat_p256_uint1 x159;
-  fiat_p256_addcarryx_u32(&x158, &x159, x157, x153, x150);
   uint32_t x160;
   fiat_p256_uint1 x161;
-  fiat_p256_addcarryx_u32(&x160, &x161, 0x0, x132, x154);
   uint32_t x162;
   fiat_p256_uint1 x163;
-  fiat_p256_addcarryx_u32(&x162, &x163, x161, x134, x156);
   uint32_t x164;
   fiat_p256_uint1 x165;
-  fiat_p256_addcarryx_u32(&x164, &x165, x163, x136, x158);
   uint32_t x166;
   fiat_p256_uint1 x167;
-  fiat_p256_addcarryx_u32(&x166, &x167, x165, x138, (x159 + x151));
   uint32_t x168;
   fiat_p256_uint1 x169;
-  fiat_p256_addcarryx_u32(&x168, &x169, x167, x140, 0x0);
   uint32_t x170;
   fiat_p256_uint1 x171;
-  fiat_p256_addcarryx_u32(&x170, &x171, x169, x142, 0x0);
   uint32_t x172;
   fiat_p256_uint1 x173;
-  fiat_p256_addcarryx_u32(&x172, &x173, x171, x144, x132);
   uint32_t x174;
   fiat_p256_uint1 x175;
-  fiat_p256_addcarryx_u32(&x174, &x175, x173, x146, x148);
   uint32_t x176;
   fiat_p256_uint1 x177;
-  fiat_p256_addcarryx_u32(&x176, &x177, x175, ((uint32_t)x147 + x131), x149);
   uint32_t x178;
   fiat_p256_uint1 x179;
-  fiat_p256_addcarryx_u32(&x178, &x179, 0x0, x162, (arg1[5]));
   uint32_t x180;
   fiat_p256_uint1 x181;
-  fiat_p256_addcarryx_u32(&x180, &x181, x179, x164, 0x0);
   uint32_t x182;
   fiat_p256_uint1 x183;
-  fiat_p256_addcarryx_u32(&x182, &x183, x181, x166, 0x0);
   uint32_t x184;
   fiat_p256_uint1 x185;
-  fiat_p256_addcarryx_u32(&x184, &x185, x183, x168, 0x0);
   uint32_t x186;
   fiat_p256_uint1 x187;
-  fiat_p256_addcarryx_u32(&x186, &x187, x185, x170, 0x0);
   uint32_t x188;
   fiat_p256_uint1 x189;
-  fiat_p256_addcarryx_u32(&x188, &x189, x187, x172, 0x0);
   uint32_t x190;
   fiat_p256_uint1 x191;
-  fiat_p256_addcarryx_u32(&x190, &x191, x189, x174, 0x0);
   uint32_t x192;
   fiat_p256_uint1 x193;
-  fiat_p256_addcarryx_u32(&x192, &x193, x191, x176, 0x0);
   uint32_t x194;
   uint32_t x195;
-  fiat_p256_mulx_u32(&x194, &x195, x178, UINT32_C(0xffffffff));
   uint32_t x196;
   uint32_t x197;
-  fiat_p256_mulx_u32(&x196, &x197, x178, UINT32_C(0xffffffff));
   uint32_t x198;
   uint32_t x199;
-  fiat_p256_mulx_u32(&x198, &x199, x178, UINT32_C(0xffffffff));
   uint32_t x200;
   uint32_t x201;
-  fiat_p256_mulx_u32(&x200, &x201, x178, UINT32_C(0xffffffff));
   uint32_t x202;
   fiat_p256_uint1 x203;
-  fiat_p256_addcarryx_u32(&x202, &x203, 0x0, x201, x198);
   uint32_t x204;
   fiat_p256_uint1 x205;
-  fiat_p256_addcarryx_u32(&x204, &x205, x203, x199, x196);
   uint32_t x206;
   fiat_p256_uint1 x207;
-  fiat_p256_addcarryx_u32(&x206, &x207, 0x0, x178, x200);
   uint32_t x208;
   fiat_p256_uint1 x209;
-  fiat_p256_addcarryx_u32(&x208, &x209, x207, x180, x202);
   uint32_t x210;
   fiat_p256_uint1 x211;
-  fiat_p256_addcarryx_u32(&x210, &x211, x209, x182, x204);
   uint32_t x212;
   fiat_p256_uint1 x213;
-  fiat_p256_addcarryx_u32(&x212, &x213, x211, x184, (x205 + x197));
   uint32_t x214;
   fiat_p256_uint1 x215;
-  fiat_p256_addcarryx_u32(&x214, &x215, x213, x186, 0x0);
   uint32_t x216;
   fiat_p256_uint1 x217;
-  fiat_p256_addcarryx_u32(&x216, &x217, x215, x188, 0x0);
   uint32_t x218;
   fiat_p256_uint1 x219;
-  fiat_p256_addcarryx_u32(&x218, &x219, x217, x190, x178);
   uint32_t x220;
   fiat_p256_uint1 x221;
-  fiat_p256_addcarryx_u32(&x220, &x221, x219, x192, x194);
   uint32_t x222;
   fiat_p256_uint1 x223;
-  fiat_p256_addcarryx_u32(&x222, &x223, x221, ((uint32_t)x193 + x177), x195);
   uint32_t x224;
   fiat_p256_uint1 x225;
-  fiat_p256_addcarryx_u32(&x224, &x225, 0x0, x208, (arg1[6]));
   uint32_t x226;
   fiat_p256_uint1 x227;
-  fiat_p256_addcarryx_u32(&x226, &x227, x225, x210, 0x0);
   uint32_t x228;
   fiat_p256_uint1 x229;
-  fiat_p256_addcarryx_u32(&x228, &x229, x227, x212, 0x0);
   uint32_t x230;
   fiat_p256_uint1 x231;
-  fiat_p256_addcarryx_u32(&x230, &x231, x229, x214, 0x0);
   uint32_t x232;
   fiat_p256_uint1 x233;
-  fiat_p256_addcarryx_u32(&x232, &x233, x231, x216, 0x0);
   uint32_t x234;
   fiat_p256_uint1 x235;
-  fiat_p256_addcarryx_u32(&x234, &x235, x233, x218, 0x0);
   uint32_t x236;
   fiat_p256_uint1 x237;
-  fiat_p256_addcarryx_u32(&x236, &x237, x235, x220, 0x0);
   uint32_t x238;
   fiat_p256_uint1 x239;
-  fiat_p256_addcarryx_u32(&x238, &x239, x237, x222, 0x0);
   uint32_t x240;
   uint32_t x241;
-  fiat_p256_mulx_u32(&x240, &x241, x224, UINT32_C(0xffffffff));
   uint32_t x242;
   uint32_t x243;
-  fiat_p256_mulx_u32(&x242, &x243, x224, UINT32_C(0xffffffff));
   uint32_t x244;
   uint32_t x245;
-  fiat_p256_mulx_u32(&x244, &x245, x224, UINT32_C(0xffffffff));
   uint32_t x246;
   uint32_t x247;
-  fiat_p256_mulx_u32(&x246, &x247, x224, UINT32_C(0xffffffff));
   uint32_t x248;
   fiat_p256_uint1 x249;
-  fiat_p256_addcarryx_u32(&x248, &x249, 0x0, x247, x244);
   uint32_t x250;
   fiat_p256_uint1 x251;
-  fiat_p256_addcarryx_u32(&x250, &x251, x249, x245, x242);
   uint32_t x252;
   fiat_p256_uint1 x253;
-  fiat_p256_addcarryx_u32(&x252, &x253, 0x0, x224, x246);
   uint32_t x254;
   fiat_p256_uint1 x255;
-  fiat_p256_addcarryx_u32(&x254, &x255, x253, x226, x248);
   uint32_t x256;
   fiat_p256_uint1 x257;
-  fiat_p256_addcarryx_u32(&x256, &x257, x255, x228, x250);
   uint32_t x258;
   fiat_p256_uint1 x259;
-  fiat_p256_addcarryx_u32(&x258, &x259, x257, x230, (x251 + x243));
   uint32_t x260;
   fiat_p256_uint1 x261;
-  fiat_p256_addcarryx_u32(&x260, &x261, x259, x232, 0x0);
   uint32_t x262;
   fiat_p256_uint1 x263;
-  fiat_p256_addcarryx_u32(&x262, &x263, x261, x234, 0x0);
   uint32_t x264;
   fiat_p256_uint1 x265;
-  fiat_p256_addcarryx_u32(&x264, &x265, x263, x236, x224);
   uint32_t x266;
   fiat_p256_uint1 x267;
-  fiat_p256_addcarryx_u32(&x266, &x267, x265, x238, x240);
   uint32_t x268;
   fiat_p256_uint1 x269;
-  fiat_p256_addcarryx_u32(&x268, &x269, x267, ((uint32_t)x239 + x223), x241);
   uint32_t x270;
   fiat_p256_uint1 x271;
-  fiat_p256_addcarryx_u32(&x270, &x271, 0x0, x254, (arg1[7]));
   uint32_t x272;
   fiat_p256_uint1 x273;
-  fiat_p256_addcarryx_u32(&x272, &x273, x271, x256, 0x0);
   uint32_t x274;
   fiat_p256_uint1 x275;
-  fiat_p256_addcarryx_u32(&x274, &x275, x273, x258, 0x0);
   uint32_t x276;
   fiat_p256_uint1 x277;
-  fiat_p256_addcarryx_u32(&x276, &x277, x275, x260, 0x0);
   uint32_t x278;
   fiat_p256_uint1 x279;
-  fiat_p256_addcarryx_u32(&x278, &x279, x277, x262, 0x0);
   uint32_t x280;
   fiat_p256_uint1 x281;
-  fiat_p256_addcarryx_u32(&x280, &x281, x279, x264, 0x0);
   uint32_t x282;
   fiat_p256_uint1 x283;
-  fiat_p256_addcarryx_u32(&x282, &x283, x281, x266, 0x0);
   uint32_t x284;
   fiat_p256_uint1 x285;
-  fiat_p256_addcarryx_u32(&x284, &x285, x283, x268, 0x0);
   uint32_t x286;
   uint32_t x287;
-  fiat_p256_mulx_u32(&x286, &x287, x270, UINT32_C(0xffffffff));
   uint32_t x288;
   uint32_t x289;
-  fiat_p256_mulx_u32(&x288, &x289, x270, UINT32_C(0xffffffff));
   uint32_t x290;
   uint32_t x291;
-  fiat_p256_mulx_u32(&x290, &x291, x270, UINT32_C(0xffffffff));
   uint32_t x292;
   uint32_t x293;
-  fiat_p256_mulx_u32(&x292, &x293, x270, UINT32_C(0xffffffff));
   uint32_t x294;
   fiat_p256_uint1 x295;
-  fiat_p256_addcarryx_u32(&x294, &x295, 0x0, x293, x290);
   uint32_t x296;
   fiat_p256_uint1 x297;
-  fiat_p256_addcarryx_u32(&x296, &x297, x295, x291, x288);
   uint32_t x298;
   fiat_p256_uint1 x299;
-  fiat_p256_addcarryx_u32(&x298, &x299, 0x0, x270, x292);
   uint32_t x300;
   fiat_p256_uint1 x301;
-  fiat_p256_addcarryx_u32(&x300, &x301, x299, x272, x294);
   uint32_t x302;
   fiat_p256_uint1 x303;
-  fiat_p256_addcarryx_u32(&x302, &x303, x301, x274, x296);
   uint32_t x304;
   fiat_p256_uint1 x305;
-  fiat_p256_addcarryx_u32(&x304, &x305, x303, x276, (x297 + x289));
   uint32_t x306;
   fiat_p256_uint1 x307;
-  fiat_p256_addcarryx_u32(&x306, &x307, x305, x278, 0x0);
   uint32_t x308;
   fiat_p256_uint1 x309;
-  fiat_p256_addcarryx_u32(&x308, &x309, x307, x280, 0x0);
   uint32_t x310;
   fiat_p256_uint1 x311;
-  fiat_p256_addcarryx_u32(&x310, &x311, x309, x282, x270);
   uint32_t x312;
   fiat_p256_uint1 x313;
-  fiat_p256_addcarryx_u32(&x312, &x313, x311, x284, x286);
   uint32_t x314;
   fiat_p256_uint1 x315;
-  fiat_p256_addcarryx_u32(&x314, &x315, x313, ((uint32_t)x285 + x269), x287);
   uint32_t x316;
   fiat_p256_uint1 x317;
-  fiat_p256_subborrowx_u32(&x316, &x317, 0x0, x300, UINT32_C(0xffffffff));
   uint32_t x318;
   fiat_p256_uint1 x319;
-  fiat_p256_subborrowx_u32(&x318, &x319, x317, x302, UINT32_C(0xffffffff));
   uint32_t x320;
   fiat_p256_uint1 x321;
-  fiat_p256_subborrowx_u32(&x320, &x321, x319, x304, UINT32_C(0xffffffff));
   uint32_t x322;
   fiat_p256_uint1 x323;
-  fiat_p256_subborrowx_u32(&x322, &x323, x321, x306, 0x0);
   uint32_t x324;
   fiat_p256_uint1 x325;
-  fiat_p256_subborrowx_u32(&x324, &x325, x323, x308, 0x0);
   uint32_t x326;
   fiat_p256_uint1 x327;
-  fiat_p256_subborrowx_u32(&x326, &x327, x325, x310, 0x0);
   uint32_t x328;
   fiat_p256_uint1 x329;
-  fiat_p256_subborrowx_u32(&x328, &x329, x327, x312, 0x1);
   uint32_t x330;
   fiat_p256_uint1 x331;
-  fiat_p256_subborrowx_u32(&x330, &x331, x329, x314, UINT32_C(0xffffffff));
   uint32_t x332;
   fiat_p256_uint1 x333;
-  fiat_p256_subborrowx_u32(&x332, &x333, x331, x315, 0x0);
   uint32_t x334;
-  fiat_p256_cmovznz_u32(&x334, x333, x316, x300);
   uint32_t x335;
-  fiat_p256_cmovznz_u32(&x335, x333, x318, x302);
   uint32_t x336;
-  fiat_p256_cmovznz_u32(&x336, x333, x320, x304);
   uint32_t x337;
-  fiat_p256_cmovznz_u32(&x337, x333, x322, x306);
   uint32_t x338;
-  fiat_p256_cmovznz_u32(&x338, x333, x324, x308);
   uint32_t x339;
-  fiat_p256_cmovznz_u32(&x339, x333, x326, x310);
   uint32_t x340;
-  fiat_p256_cmovznz_u32(&x340, x333, x328, x312);
   uint32_t x341;
+  x1 = (arg1[0]);
+  fiat_p256_mulx_u32(&x2, &x3, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x4, &x5, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x6, &x7, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x8, &x9, x1, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x10, &x11, 0x0, x9, x6);
+  fiat_p256_addcarryx_u32(&x12, &x13, x11, x7, x4);
+  fiat_p256_addcarryx_u32(&x14, &x15, 0x0, x1, x8);
+  fiat_p256_addcarryx_u32(&x16, &x17, x15, 0x0, x10);
+  fiat_p256_addcarryx_u32(&x18, &x19, x17, 0x0, x12);
+  fiat_p256_addcarryx_u32(&x20, &x21, x19, 0x0, (x13 + x5));
+  fiat_p256_addcarryx_u32(&x22, &x23, 0x0, x16, (arg1[1]));
+  fiat_p256_addcarryx_u32(&x24, &x25, x23, x18, 0x0);
+  fiat_p256_addcarryx_u32(&x26, &x27, x25, x20, 0x0);
+  fiat_p256_mulx_u32(&x28, &x29, x22, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x30, &x31, x22, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x32, &x33, x22, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x34, &x35, x22, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x36, &x37, 0x0, x35, x32);
+  fiat_p256_addcarryx_u32(&x38, &x39, x37, x33, x30);
+  fiat_p256_addcarryx_u32(&x40, &x41, 0x0, x22, x34);
+  fiat_p256_addcarryx_u32(&x42, &x43, x41, x24, x36);
+  fiat_p256_addcarryx_u32(&x44, &x45, x43, x26, x38);
+  fiat_p256_addcarryx_u32(&x46, &x47, x45, ((uint32_t)x27 + x21), (x39 + x31));
+  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x2, x22);
+  fiat_p256_addcarryx_u32(&x50, &x51, x49, x3, x28);
+  fiat_p256_addcarryx_u32(&x52, &x53, 0x0, x42, (arg1[2]));
+  fiat_p256_addcarryx_u32(&x54, &x55, x53, x44, 0x0);
+  fiat_p256_addcarryx_u32(&x56, &x57, x55, x46, 0x0);
+  fiat_p256_mulx_u32(&x58, &x59, x52, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x60, &x61, x52, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x62, &x63, x52, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x64, &x65, x52, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x66, &x67, 0x0, x65, x62);
+  fiat_p256_addcarryx_u32(&x68, &x69, x67, x63, x60);
+  fiat_p256_addcarryx_u32(&x70, &x71, 0x0, x52, x64);
+  fiat_p256_addcarryx_u32(&x72, &x73, x71, x54, x66);
+  fiat_p256_addcarryx_u32(&x74, &x75, x73, x56, x68);
+  fiat_p256_addcarryx_u32(&x76, &x77, x75, ((uint32_t)x57 + x47), (x69 + x61));
+  fiat_p256_addcarryx_u32(&x78, &x79, x77, x1, 0x0);
+  fiat_p256_addcarryx_u32(&x80, &x81, x79, x48, 0x0);
+  fiat_p256_addcarryx_u32(&x82, &x83, x81, x50, x52);
+  fiat_p256_addcarryx_u32(&x84, &x85, x83, (x51 + x29), x58);
+  fiat_p256_addcarryx_u32(&x86, &x87, 0x0, x72, (arg1[3]));
+  fiat_p256_addcarryx_u32(&x88, &x89, x87, x74, 0x0);
+  fiat_p256_addcarryx_u32(&x90, &x91, x89, x76, 0x0);
+  fiat_p256_addcarryx_u32(&x92, &x93, x91, x78, 0x0);
+  fiat_p256_addcarryx_u32(&x94, &x95, x93, x80, 0x0);
+  fiat_p256_addcarryx_u32(&x96, &x97, x95, x82, 0x0);
+  fiat_p256_addcarryx_u32(&x98, &x99, x97, x84, 0x0);
+  fiat_p256_addcarryx_u32(&x100, &x101, x99, (x85 + x59), 0x0);
+  fiat_p256_mulx_u32(&x102, &x103, x86, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x104, &x105, x86, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x106, &x107, x86, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x108, &x109, x86, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x110, &x111, 0x0, x109, x106);
+  fiat_p256_addcarryx_u32(&x112, &x113, x111, x107, x104);
+  fiat_p256_addcarryx_u32(&x114, &x115, 0x0, x86, x108);
+  fiat_p256_addcarryx_u32(&x116, &x117, x115, x88, x110);
+  fiat_p256_addcarryx_u32(&x118, &x119, x117, x90, x112);
+  fiat_p256_addcarryx_u32(&x120, &x121, x119, x92, (x113 + x105));
+  fiat_p256_addcarryx_u32(&x122, &x123, x121, x94, 0x0);
+  fiat_p256_addcarryx_u32(&x124, &x125, x123, x96, 0x0);
+  fiat_p256_addcarryx_u32(&x126, &x127, x125, x98, x86);
+  fiat_p256_addcarryx_u32(&x128, &x129, x127, x100, x102);
+  fiat_p256_addcarryx_u32(&x130, &x131, x129, x101, x103);
+  fiat_p256_addcarryx_u32(&x132, &x133, 0x0, x116, (arg1[4]));
+  fiat_p256_addcarryx_u32(&x134, &x135, x133, x118, 0x0);
+  fiat_p256_addcarryx_u32(&x136, &x137, x135, x120, 0x0);
+  fiat_p256_addcarryx_u32(&x138, &x139, x137, x122, 0x0);
+  fiat_p256_addcarryx_u32(&x140, &x141, x139, x124, 0x0);
+  fiat_p256_addcarryx_u32(&x142, &x143, x141, x126, 0x0);
+  fiat_p256_addcarryx_u32(&x144, &x145, x143, x128, 0x0);
+  fiat_p256_addcarryx_u32(&x146, &x147, x145, x130, 0x0);
+  fiat_p256_mulx_u32(&x148, &x149, x132, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x150, &x151, x132, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x152, &x153, x132, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x154, &x155, x132, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x156, &x157, 0x0, x155, x152);
+  fiat_p256_addcarryx_u32(&x158, &x159, x157, x153, x150);
+  fiat_p256_addcarryx_u32(&x160, &x161, 0x0, x132, x154);
+  fiat_p256_addcarryx_u32(&x162, &x163, x161, x134, x156);
+  fiat_p256_addcarryx_u32(&x164, &x165, x163, x136, x158);
+  fiat_p256_addcarryx_u32(&x166, &x167, x165, x138, (x159 + x151));
+  fiat_p256_addcarryx_u32(&x168, &x169, x167, x140, 0x0);
+  fiat_p256_addcarryx_u32(&x170, &x171, x169, x142, 0x0);
+  fiat_p256_addcarryx_u32(&x172, &x173, x171, x144, x132);
+  fiat_p256_addcarryx_u32(&x174, &x175, x173, x146, x148);
+  fiat_p256_addcarryx_u32(&x176, &x177, x175, ((uint32_t)x147 + x131), x149);
+  fiat_p256_addcarryx_u32(&x178, &x179, 0x0, x162, (arg1[5]));
+  fiat_p256_addcarryx_u32(&x180, &x181, x179, x164, 0x0);
+  fiat_p256_addcarryx_u32(&x182, &x183, x181, x166, 0x0);
+  fiat_p256_addcarryx_u32(&x184, &x185, x183, x168, 0x0);
+  fiat_p256_addcarryx_u32(&x186, &x187, x185, x170, 0x0);
+  fiat_p256_addcarryx_u32(&x188, &x189, x187, x172, 0x0);
+  fiat_p256_addcarryx_u32(&x190, &x191, x189, x174, 0x0);
+  fiat_p256_addcarryx_u32(&x192, &x193, x191, x176, 0x0);
+  fiat_p256_mulx_u32(&x194, &x195, x178, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x196, &x197, x178, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x198, &x199, x178, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x200, &x201, x178, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x202, &x203, 0x0, x201, x198);
+  fiat_p256_addcarryx_u32(&x204, &x205, x203, x199, x196);
+  fiat_p256_addcarryx_u32(&x206, &x207, 0x0, x178, x200);
+  fiat_p256_addcarryx_u32(&x208, &x209, x207, x180, x202);
+  fiat_p256_addcarryx_u32(&x210, &x211, x209, x182, x204);
+  fiat_p256_addcarryx_u32(&x212, &x213, x211, x184, (x205 + x197));
+  fiat_p256_addcarryx_u32(&x214, &x215, x213, x186, 0x0);
+  fiat_p256_addcarryx_u32(&x216, &x217, x215, x188, 0x0);
+  fiat_p256_addcarryx_u32(&x218, &x219, x217, x190, x178);
+  fiat_p256_addcarryx_u32(&x220, &x221, x219, x192, x194);
+  fiat_p256_addcarryx_u32(&x222, &x223, x221, ((uint32_t)x193 + x177), x195);
+  fiat_p256_addcarryx_u32(&x224, &x225, 0x0, x208, (arg1[6]));
+  fiat_p256_addcarryx_u32(&x226, &x227, x225, x210, 0x0);
+  fiat_p256_addcarryx_u32(&x228, &x229, x227, x212, 0x0);
+  fiat_p256_addcarryx_u32(&x230, &x231, x229, x214, 0x0);
+  fiat_p256_addcarryx_u32(&x232, &x233, x231, x216, 0x0);
+  fiat_p256_addcarryx_u32(&x234, &x235, x233, x218, 0x0);
+  fiat_p256_addcarryx_u32(&x236, &x237, x235, x220, 0x0);
+  fiat_p256_addcarryx_u32(&x238, &x239, x237, x222, 0x0);
+  fiat_p256_mulx_u32(&x240, &x241, x224, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x242, &x243, x224, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x244, &x245, x224, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x246, &x247, x224, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x248, &x249, 0x0, x247, x244);
+  fiat_p256_addcarryx_u32(&x250, &x251, x249, x245, x242);
+  fiat_p256_addcarryx_u32(&x252, &x253, 0x0, x224, x246);
+  fiat_p256_addcarryx_u32(&x254, &x255, x253, x226, x248);
+  fiat_p256_addcarryx_u32(&x256, &x257, x255, x228, x250);
+  fiat_p256_addcarryx_u32(&x258, &x259, x257, x230, (x251 + x243));
+  fiat_p256_addcarryx_u32(&x260, &x261, x259, x232, 0x0);
+  fiat_p256_addcarryx_u32(&x262, &x263, x261, x234, 0x0);
+  fiat_p256_addcarryx_u32(&x264, &x265, x263, x236, x224);
+  fiat_p256_addcarryx_u32(&x266, &x267, x265, x238, x240);
+  fiat_p256_addcarryx_u32(&x268, &x269, x267, ((uint32_t)x239 + x223), x241);
+  fiat_p256_addcarryx_u32(&x270, &x271, 0x0, x254, (arg1[7]));
+  fiat_p256_addcarryx_u32(&x272, &x273, x271, x256, 0x0);
+  fiat_p256_addcarryx_u32(&x274, &x275, x273, x258, 0x0);
+  fiat_p256_addcarryx_u32(&x276, &x277, x275, x260, 0x0);
+  fiat_p256_addcarryx_u32(&x278, &x279, x277, x262, 0x0);
+  fiat_p256_addcarryx_u32(&x280, &x281, x279, x264, 0x0);
+  fiat_p256_addcarryx_u32(&x282, &x283, x281, x266, 0x0);
+  fiat_p256_addcarryx_u32(&x284, &x285, x283, x268, 0x0);
+  fiat_p256_mulx_u32(&x286, &x287, x270, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x288, &x289, x270, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x290, &x291, x270, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x292, &x293, x270, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x294, &x295, 0x0, x293, x290);
+  fiat_p256_addcarryx_u32(&x296, &x297, x295, x291, x288);
+  fiat_p256_addcarryx_u32(&x298, &x299, 0x0, x270, x292);
+  fiat_p256_addcarryx_u32(&x300, &x301, x299, x272, x294);
+  fiat_p256_addcarryx_u32(&x302, &x303, x301, x274, x296);
+  fiat_p256_addcarryx_u32(&x304, &x305, x303, x276, (x297 + x289));
+  fiat_p256_addcarryx_u32(&x306, &x307, x305, x278, 0x0);
+  fiat_p256_addcarryx_u32(&x308, &x309, x307, x280, 0x0);
+  fiat_p256_addcarryx_u32(&x310, &x311, x309, x282, x270);
+  fiat_p256_addcarryx_u32(&x312, &x313, x311, x284, x286);
+  fiat_p256_addcarryx_u32(&x314, &x315, x313, ((uint32_t)x285 + x269), x287);
+  fiat_p256_subborrowx_u32(&x316, &x317, 0x0, x300, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x318, &x319, x317, x302, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x320, &x321, x319, x304, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x322, &x323, x321, x306, 0x0);
+  fiat_p256_subborrowx_u32(&x324, &x325, x323, x308, 0x0);
+  fiat_p256_subborrowx_u32(&x326, &x327, x325, x310, 0x0);
+  fiat_p256_subborrowx_u32(&x328, &x329, x327, x312, 0x1);
+  fiat_p256_subborrowx_u32(&x330, &x331, x329, x314, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x332, &x333, x331, x315, 0x0);
+  fiat_p256_cmovznz_u32(&x334, x333, x316, x300);
+  fiat_p256_cmovznz_u32(&x335, x333, x318, x302);
+  fiat_p256_cmovznz_u32(&x336, x333, x320, x304);
+  fiat_p256_cmovznz_u32(&x337, x333, x322, x306);
+  fiat_p256_cmovznz_u32(&x338, x333, x324, x308);
+  fiat_p256_cmovznz_u32(&x339, x333, x326, x310);
+  fiat_p256_cmovznz_u32(&x340, x333, x328, x312);
   fiat_p256_cmovznz_u32(&x341, x333, x330, x314);
   out1[0] = x334;
   out1[1] = x335;
@@ -2909,7 +2989,904 @@
 }
 
 /*
+ * The function fiat_p256_to_montgomery translates a field element into the Montgomery domain.
+ *
+ * Preconditions:
+ *   0 ≤ eval arg1 < m
+ * Postconditions:
+ *   eval (from_montgomery out1) mod m = eval arg1 mod m
+ *   0 ≤ eval out1 < m
+ *
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_to_montgomery(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_non_montgomery_domain_field_element arg1) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint32_t x11;
+  uint32_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint32_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint32_t x19;
+  uint32_t x20;
+  uint32_t x21;
+  uint32_t x22;
+  uint32_t x23;
+  fiat_p256_uint1 x24;
+  uint32_t x25;
+  fiat_p256_uint1 x26;
+  uint32_t x27;
+  fiat_p256_uint1 x28;
+  uint32_t x29;
+  fiat_p256_uint1 x30;
+  uint32_t x31;
+  fiat_p256_uint1 x32;
+  uint32_t x33;
+  uint32_t x34;
+  uint32_t x35;
+  uint32_t x36;
+  uint32_t x37;
+  uint32_t x38;
+  uint32_t x39;
+  uint32_t x40;
+  uint32_t x41;
+  fiat_p256_uint1 x42;
+  uint32_t x43;
+  fiat_p256_uint1 x44;
+  uint32_t x45;
+  fiat_p256_uint1 x46;
+  uint32_t x47;
+  fiat_p256_uint1 x48;
+  uint32_t x49;
+  fiat_p256_uint1 x50;
+  uint32_t x51;
+  fiat_p256_uint1 x52;
+  uint32_t x53;
+  fiat_p256_uint1 x54;
+  uint32_t x55;
+  fiat_p256_uint1 x56;
+  uint32_t x57;
+  fiat_p256_uint1 x58;
+  uint32_t x59;
+  fiat_p256_uint1 x60;
+  uint32_t x61;
+  fiat_p256_uint1 x62;
+  uint32_t x63;
+  uint32_t x64;
+  uint32_t x65;
+  uint32_t x66;
+  uint32_t x67;
+  uint32_t x68;
+  uint32_t x69;
+  uint32_t x70;
+  uint32_t x71;
+  uint32_t x72;
+  uint32_t x73;
+  uint32_t x74;
+  uint32_t x75;
+  uint32_t x76;
+  uint32_t x77;
+  fiat_p256_uint1 x78;
+  uint32_t x79;
+  fiat_p256_uint1 x80;
+  uint32_t x81;
+  fiat_p256_uint1 x82;
+  uint32_t x83;
+  fiat_p256_uint1 x84;
+  uint32_t x85;
+  fiat_p256_uint1 x86;
+  uint32_t x87;
+  fiat_p256_uint1 x88;
+  uint32_t x89;
+  fiat_p256_uint1 x90;
+  uint32_t x91;
+  fiat_p256_uint1 x92;
+  uint32_t x93;
+  fiat_p256_uint1 x94;
+  uint32_t x95;
+  fiat_p256_uint1 x96;
+  uint32_t x97;
+  fiat_p256_uint1 x98;
+  uint32_t x99;
+  fiat_p256_uint1 x100;
+  uint32_t x101;
+  fiat_p256_uint1 x102;
+  uint32_t x103;
+  uint32_t x104;
+  uint32_t x105;
+  uint32_t x106;
+  uint32_t x107;
+  uint32_t x108;
+  uint32_t x109;
+  uint32_t x110;
+  uint32_t x111;
+  fiat_p256_uint1 x112;
+  uint32_t x113;
+  fiat_p256_uint1 x114;
+  uint32_t x115;
+  fiat_p256_uint1 x116;
+  uint32_t x117;
+  fiat_p256_uint1 x118;
+  uint32_t x119;
+  fiat_p256_uint1 x120;
+  uint32_t x121;
+  fiat_p256_uint1 x122;
+  uint32_t x123;
+  fiat_p256_uint1 x124;
+  uint32_t x125;
+  fiat_p256_uint1 x126;
+  uint32_t x127;
+  fiat_p256_uint1 x128;
+  uint32_t x129;
+  fiat_p256_uint1 x130;
+  uint32_t x131;
+  fiat_p256_uint1 x132;
+  uint32_t x133;
+  uint32_t x134;
+  uint32_t x135;
+  uint32_t x136;
+  uint32_t x137;
+  uint32_t x138;
+  uint32_t x139;
+  uint32_t x140;
+  uint32_t x141;
+  uint32_t x142;
+  uint32_t x143;
+  uint32_t x144;
+  uint32_t x145;
+  uint32_t x146;
+  uint32_t x147;
+  fiat_p256_uint1 x148;
+  uint32_t x149;
+  fiat_p256_uint1 x150;
+  uint32_t x151;
+  fiat_p256_uint1 x152;
+  uint32_t x153;
+  fiat_p256_uint1 x154;
+  uint32_t x155;
+  fiat_p256_uint1 x156;
+  uint32_t x157;
+  fiat_p256_uint1 x158;
+  uint32_t x159;
+  fiat_p256_uint1 x160;
+  uint32_t x161;
+  fiat_p256_uint1 x162;
+  uint32_t x163;
+  fiat_p256_uint1 x164;
+  uint32_t x165;
+  fiat_p256_uint1 x166;
+  uint32_t x167;
+  fiat_p256_uint1 x168;
+  uint32_t x169;
+  fiat_p256_uint1 x170;
+  uint32_t x171;
+  fiat_p256_uint1 x172;
+  uint32_t x173;
+  uint32_t x174;
+  uint32_t x175;
+  uint32_t x176;
+  uint32_t x177;
+  uint32_t x178;
+  uint32_t x179;
+  uint32_t x180;
+  uint32_t x181;
+  fiat_p256_uint1 x182;
+  uint32_t x183;
+  fiat_p256_uint1 x184;
+  uint32_t x185;
+  fiat_p256_uint1 x186;
+  uint32_t x187;
+  fiat_p256_uint1 x188;
+  uint32_t x189;
+  fiat_p256_uint1 x190;
+  uint32_t x191;
+  fiat_p256_uint1 x192;
+  uint32_t x193;
+  fiat_p256_uint1 x194;
+  uint32_t x195;
+  fiat_p256_uint1 x196;
+  uint32_t x197;
+  fiat_p256_uint1 x198;
+  uint32_t x199;
+  fiat_p256_uint1 x200;
+  uint32_t x201;
+  fiat_p256_uint1 x202;
+  uint32_t x203;
+  uint32_t x204;
+  uint32_t x205;
+  uint32_t x206;
+  uint32_t x207;
+  uint32_t x208;
+  uint32_t x209;
+  uint32_t x210;
+  uint32_t x211;
+  uint32_t x212;
+  uint32_t x213;
+  uint32_t x214;
+  uint32_t x215;
+  uint32_t x216;
+  uint32_t x217;
+  fiat_p256_uint1 x218;
+  uint32_t x219;
+  fiat_p256_uint1 x220;
+  uint32_t x221;
+  fiat_p256_uint1 x222;
+  uint32_t x223;
+  fiat_p256_uint1 x224;
+  uint32_t x225;
+  fiat_p256_uint1 x226;
+  uint32_t x227;
+  fiat_p256_uint1 x228;
+  uint32_t x229;
+  fiat_p256_uint1 x230;
+  uint32_t x231;
+  fiat_p256_uint1 x232;
+  uint32_t x233;
+  fiat_p256_uint1 x234;
+  uint32_t x235;
+  fiat_p256_uint1 x236;
+  uint32_t x237;
+  fiat_p256_uint1 x238;
+  uint32_t x239;
+  fiat_p256_uint1 x240;
+  uint32_t x241;
+  fiat_p256_uint1 x242;
+  uint32_t x243;
+  uint32_t x244;
+  uint32_t x245;
+  uint32_t x246;
+  uint32_t x247;
+  uint32_t x248;
+  uint32_t x249;
+  uint32_t x250;
+  uint32_t x251;
+  fiat_p256_uint1 x252;
+  uint32_t x253;
+  fiat_p256_uint1 x254;
+  uint32_t x255;
+  fiat_p256_uint1 x256;
+  uint32_t x257;
+  fiat_p256_uint1 x258;
+  uint32_t x259;
+  fiat_p256_uint1 x260;
+  uint32_t x261;
+  fiat_p256_uint1 x262;
+  uint32_t x263;
+  fiat_p256_uint1 x264;
+  uint32_t x265;
+  fiat_p256_uint1 x266;
+  uint32_t x267;
+  fiat_p256_uint1 x268;
+  uint32_t x269;
+  fiat_p256_uint1 x270;
+  uint32_t x271;
+  fiat_p256_uint1 x272;
+  uint32_t x273;
+  uint32_t x274;
+  uint32_t x275;
+  uint32_t x276;
+  uint32_t x277;
+  uint32_t x278;
+  uint32_t x279;
+  uint32_t x280;
+  uint32_t x281;
+  uint32_t x282;
+  uint32_t x283;
+  uint32_t x284;
+  uint32_t x285;
+  uint32_t x286;
+  uint32_t x287;
+  fiat_p256_uint1 x288;
+  uint32_t x289;
+  fiat_p256_uint1 x290;
+  uint32_t x291;
+  fiat_p256_uint1 x292;
+  uint32_t x293;
+  fiat_p256_uint1 x294;
+  uint32_t x295;
+  fiat_p256_uint1 x296;
+  uint32_t x297;
+  fiat_p256_uint1 x298;
+  uint32_t x299;
+  fiat_p256_uint1 x300;
+  uint32_t x301;
+  fiat_p256_uint1 x302;
+  uint32_t x303;
+  fiat_p256_uint1 x304;
+  uint32_t x305;
+  fiat_p256_uint1 x306;
+  uint32_t x307;
+  fiat_p256_uint1 x308;
+  uint32_t x309;
+  fiat_p256_uint1 x310;
+  uint32_t x311;
+  fiat_p256_uint1 x312;
+  uint32_t x313;
+  uint32_t x314;
+  uint32_t x315;
+  uint32_t x316;
+  uint32_t x317;
+  uint32_t x318;
+  uint32_t x319;
+  uint32_t x320;
+  uint32_t x321;
+  fiat_p256_uint1 x322;
+  uint32_t x323;
+  fiat_p256_uint1 x324;
+  uint32_t x325;
+  fiat_p256_uint1 x326;
+  uint32_t x327;
+  fiat_p256_uint1 x328;
+  uint32_t x329;
+  fiat_p256_uint1 x330;
+  uint32_t x331;
+  fiat_p256_uint1 x332;
+  uint32_t x333;
+  fiat_p256_uint1 x334;
+  uint32_t x335;
+  fiat_p256_uint1 x336;
+  uint32_t x337;
+  fiat_p256_uint1 x338;
+  uint32_t x339;
+  fiat_p256_uint1 x340;
+  uint32_t x341;
+  fiat_p256_uint1 x342;
+  uint32_t x343;
+  uint32_t x344;
+  uint32_t x345;
+  uint32_t x346;
+  uint32_t x347;
+  uint32_t x348;
+  uint32_t x349;
+  uint32_t x350;
+  uint32_t x351;
+  uint32_t x352;
+  uint32_t x353;
+  uint32_t x354;
+  uint32_t x355;
+  uint32_t x356;
+  uint32_t x357;
+  fiat_p256_uint1 x358;
+  uint32_t x359;
+  fiat_p256_uint1 x360;
+  uint32_t x361;
+  fiat_p256_uint1 x362;
+  uint32_t x363;
+  fiat_p256_uint1 x364;
+  uint32_t x365;
+  fiat_p256_uint1 x366;
+  uint32_t x367;
+  fiat_p256_uint1 x368;
+  uint32_t x369;
+  fiat_p256_uint1 x370;
+  uint32_t x371;
+  fiat_p256_uint1 x372;
+  uint32_t x373;
+  fiat_p256_uint1 x374;
+  uint32_t x375;
+  fiat_p256_uint1 x376;
+  uint32_t x377;
+  fiat_p256_uint1 x378;
+  uint32_t x379;
+  fiat_p256_uint1 x380;
+  uint32_t x381;
+  fiat_p256_uint1 x382;
+  uint32_t x383;
+  uint32_t x384;
+  uint32_t x385;
+  uint32_t x386;
+  uint32_t x387;
+  uint32_t x388;
+  uint32_t x389;
+  uint32_t x390;
+  uint32_t x391;
+  fiat_p256_uint1 x392;
+  uint32_t x393;
+  fiat_p256_uint1 x394;
+  uint32_t x395;
+  fiat_p256_uint1 x396;
+  uint32_t x397;
+  fiat_p256_uint1 x398;
+  uint32_t x399;
+  fiat_p256_uint1 x400;
+  uint32_t x401;
+  fiat_p256_uint1 x402;
+  uint32_t x403;
+  fiat_p256_uint1 x404;
+  uint32_t x405;
+  fiat_p256_uint1 x406;
+  uint32_t x407;
+  fiat_p256_uint1 x408;
+  uint32_t x409;
+  fiat_p256_uint1 x410;
+  uint32_t x411;
+  fiat_p256_uint1 x412;
+  uint32_t x413;
+  uint32_t x414;
+  uint32_t x415;
+  uint32_t x416;
+  uint32_t x417;
+  uint32_t x418;
+  uint32_t x419;
+  uint32_t x420;
+  uint32_t x421;
+  uint32_t x422;
+  uint32_t x423;
+  uint32_t x424;
+  uint32_t x425;
+  uint32_t x426;
+  uint32_t x427;
+  fiat_p256_uint1 x428;
+  uint32_t x429;
+  fiat_p256_uint1 x430;
+  uint32_t x431;
+  fiat_p256_uint1 x432;
+  uint32_t x433;
+  fiat_p256_uint1 x434;
+  uint32_t x435;
+  fiat_p256_uint1 x436;
+  uint32_t x437;
+  fiat_p256_uint1 x438;
+  uint32_t x439;
+  fiat_p256_uint1 x440;
+  uint32_t x441;
+  fiat_p256_uint1 x442;
+  uint32_t x443;
+  fiat_p256_uint1 x444;
+  uint32_t x445;
+  fiat_p256_uint1 x446;
+  uint32_t x447;
+  fiat_p256_uint1 x448;
+  uint32_t x449;
+  fiat_p256_uint1 x450;
+  uint32_t x451;
+  fiat_p256_uint1 x452;
+  uint32_t x453;
+  uint32_t x454;
+  uint32_t x455;
+  uint32_t x456;
+  uint32_t x457;
+  uint32_t x458;
+  uint32_t x459;
+  uint32_t x460;
+  uint32_t x461;
+  fiat_p256_uint1 x462;
+  uint32_t x463;
+  fiat_p256_uint1 x464;
+  uint32_t x465;
+  fiat_p256_uint1 x466;
+  uint32_t x467;
+  fiat_p256_uint1 x468;
+  uint32_t x469;
+  fiat_p256_uint1 x470;
+  uint32_t x471;
+  fiat_p256_uint1 x472;
+  uint32_t x473;
+  fiat_p256_uint1 x474;
+  uint32_t x475;
+  fiat_p256_uint1 x476;
+  uint32_t x477;
+  fiat_p256_uint1 x478;
+  uint32_t x479;
+  fiat_p256_uint1 x480;
+  uint32_t x481;
+  fiat_p256_uint1 x482;
+  uint32_t x483;
+  uint32_t x484;
+  uint32_t x485;
+  uint32_t x486;
+  uint32_t x487;
+  uint32_t x488;
+  uint32_t x489;
+  uint32_t x490;
+  uint32_t x491;
+  uint32_t x492;
+  uint32_t x493;
+  uint32_t x494;
+  uint32_t x495;
+  uint32_t x496;
+  uint32_t x497;
+  fiat_p256_uint1 x498;
+  uint32_t x499;
+  fiat_p256_uint1 x500;
+  uint32_t x501;
+  fiat_p256_uint1 x502;
+  uint32_t x503;
+  fiat_p256_uint1 x504;
+  uint32_t x505;
+  fiat_p256_uint1 x506;
+  uint32_t x507;
+  fiat_p256_uint1 x508;
+  uint32_t x509;
+  fiat_p256_uint1 x510;
+  uint32_t x511;
+  fiat_p256_uint1 x512;
+  uint32_t x513;
+  fiat_p256_uint1 x514;
+  uint32_t x515;
+  fiat_p256_uint1 x516;
+  uint32_t x517;
+  fiat_p256_uint1 x518;
+  uint32_t x519;
+  fiat_p256_uint1 x520;
+  uint32_t x521;
+  fiat_p256_uint1 x522;
+  uint32_t x523;
+  uint32_t x524;
+  uint32_t x525;
+  uint32_t x526;
+  uint32_t x527;
+  uint32_t x528;
+  uint32_t x529;
+  uint32_t x530;
+  uint32_t x531;
+  fiat_p256_uint1 x532;
+  uint32_t x533;
+  fiat_p256_uint1 x534;
+  uint32_t x535;
+  fiat_p256_uint1 x536;
+  uint32_t x537;
+  fiat_p256_uint1 x538;
+  uint32_t x539;
+  fiat_p256_uint1 x540;
+  uint32_t x541;
+  fiat_p256_uint1 x542;
+  uint32_t x543;
+  fiat_p256_uint1 x544;
+  uint32_t x545;
+  fiat_p256_uint1 x546;
+  uint32_t x547;
+  fiat_p256_uint1 x548;
+  uint32_t x549;
+  fiat_p256_uint1 x550;
+  uint32_t x551;
+  fiat_p256_uint1 x552;
+  uint32_t x553;
+  fiat_p256_uint1 x554;
+  uint32_t x555;
+  fiat_p256_uint1 x556;
+  uint32_t x557;
+  fiat_p256_uint1 x558;
+  uint32_t x559;
+  fiat_p256_uint1 x560;
+  uint32_t x561;
+  fiat_p256_uint1 x562;
+  uint32_t x563;
+  fiat_p256_uint1 x564;
+  uint32_t x565;
+  fiat_p256_uint1 x566;
+  uint32_t x567;
+  fiat_p256_uint1 x568;
+  uint32_t x569;
+  fiat_p256_uint1 x570;
+  uint32_t x571;
+  uint32_t x572;
+  uint32_t x573;
+  uint32_t x574;
+  uint32_t x575;
+  uint32_t x576;
+  uint32_t x577;
+  uint32_t x578;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[4]);
+  x5 = (arg1[5]);
+  x6 = (arg1[6]);
+  x7 = (arg1[7]);
+  x8 = (arg1[0]);
+  fiat_p256_mulx_u32(&x9, &x10, x8, 0x4);
+  fiat_p256_mulx_u32(&x11, &x12, x8, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x13, &x14, x8, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x15, &x16, x8, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x17, &x18, x8, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x19, &x20, x8, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x21, &x22, x8, 0x3);
+  fiat_p256_addcarryx_u32(&x23, &x24, 0x0, x20, x17);
+  fiat_p256_addcarryx_u32(&x25, &x26, x24, x18, x15);
+  fiat_p256_addcarryx_u32(&x27, &x28, x26, x16, x13);
+  fiat_p256_addcarryx_u32(&x29, &x30, x28, x14, x11);
+  fiat_p256_addcarryx_u32(&x31, &x32, x30, x12, x9);
+  fiat_p256_mulx_u32(&x33, &x34, x21, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x35, &x36, x21, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x37, &x38, x21, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x39, &x40, x21, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x41, &x42, 0x0, x40, x37);
+  fiat_p256_addcarryx_u32(&x43, &x44, x42, x38, x35);
+  fiat_p256_addcarryx_u32(&x45, &x46, 0x0, x21, x39);
+  fiat_p256_addcarryx_u32(&x47, &x48, x46, x22, x41);
+  fiat_p256_addcarryx_u32(&x49, &x50, x48, x19, x43);
+  fiat_p256_addcarryx_u32(&x51, &x52, x50, x23, (x44 + x36));
+  fiat_p256_addcarryx_u32(&x53, &x54, x52, x25, 0x0);
+  fiat_p256_addcarryx_u32(&x55, &x56, x54, x27, 0x0);
+  fiat_p256_addcarryx_u32(&x57, &x58, x56, x29, x21);
+  fiat_p256_addcarryx_u32(&x59, &x60, x58, x31, x33);
+  fiat_p256_addcarryx_u32(&x61, &x62, x60, (x32 + x10), x34);
+  fiat_p256_mulx_u32(&x63, &x64, x1, 0x4);
+  fiat_p256_mulx_u32(&x65, &x66, x1, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x67, &x68, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x69, &x70, x1, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x71, &x72, x1, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x73, &x74, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x75, &x76, x1, 0x3);
+  fiat_p256_addcarryx_u32(&x77, &x78, 0x0, x74, x71);
+  fiat_p256_addcarryx_u32(&x79, &x80, x78, x72, x69);
+  fiat_p256_addcarryx_u32(&x81, &x82, x80, x70, x67);
+  fiat_p256_addcarryx_u32(&x83, &x84, x82, x68, x65);
+  fiat_p256_addcarryx_u32(&x85, &x86, x84, x66, x63);
+  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x47, x75);
+  fiat_p256_addcarryx_u32(&x89, &x90, x88, x49, x76);
+  fiat_p256_addcarryx_u32(&x91, &x92, x90, x51, x73);
+  fiat_p256_addcarryx_u32(&x93, &x94, x92, x53, x77);
+  fiat_p256_addcarryx_u32(&x95, &x96, x94, x55, x79);
+  fiat_p256_addcarryx_u32(&x97, &x98, x96, x57, x81);
+  fiat_p256_addcarryx_u32(&x99, &x100, x98, x59, x83);
+  fiat_p256_addcarryx_u32(&x101, &x102, x100, x61, x85);
+  fiat_p256_mulx_u32(&x103, &x104, x87, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x105, &x106, x87, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x107, &x108, x87, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x109, &x110, x87, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x111, &x112, 0x0, x110, x107);
+  fiat_p256_addcarryx_u32(&x113, &x114, x112, x108, x105);
+  fiat_p256_addcarryx_u32(&x115, &x116, 0x0, x87, x109);
+  fiat_p256_addcarryx_u32(&x117, &x118, x116, x89, x111);
+  fiat_p256_addcarryx_u32(&x119, &x120, x118, x91, x113);
+  fiat_p256_addcarryx_u32(&x121, &x122, x120, x93, (x114 + x106));
+  fiat_p256_addcarryx_u32(&x123, &x124, x122, x95, 0x0);
+  fiat_p256_addcarryx_u32(&x125, &x126, x124, x97, 0x0);
+  fiat_p256_addcarryx_u32(&x127, &x128, x126, x99, x87);
+  fiat_p256_addcarryx_u32(&x129, &x130, x128, x101, x103);
+  fiat_p256_addcarryx_u32(&x131, &x132, x130, (((uint32_t)x102 + x62) + (x86 + x64)), x104);
+  fiat_p256_mulx_u32(&x133, &x134, x2, 0x4);
+  fiat_p256_mulx_u32(&x135, &x136, x2, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x137, &x138, x2, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x139, &x140, x2, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x141, &x142, x2, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x143, &x144, x2, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x145, &x146, x2, 0x3);
+  fiat_p256_addcarryx_u32(&x147, &x148, 0x0, x144, x141);
+  fiat_p256_addcarryx_u32(&x149, &x150, x148, x142, x139);
+  fiat_p256_addcarryx_u32(&x151, &x152, x150, x140, x137);
+  fiat_p256_addcarryx_u32(&x153, &x154, x152, x138, x135);
+  fiat_p256_addcarryx_u32(&x155, &x156, x154, x136, x133);
+  fiat_p256_addcarryx_u32(&x157, &x158, 0x0, x117, x145);
+  fiat_p256_addcarryx_u32(&x159, &x160, x158, x119, x146);
+  fiat_p256_addcarryx_u32(&x161, &x162, x160, x121, x143);
+  fiat_p256_addcarryx_u32(&x163, &x164, x162, x123, x147);
+  fiat_p256_addcarryx_u32(&x165, &x166, x164, x125, x149);
+  fiat_p256_addcarryx_u32(&x167, &x168, x166, x127, x151);
+  fiat_p256_addcarryx_u32(&x169, &x170, x168, x129, x153);
+  fiat_p256_addcarryx_u32(&x171, &x172, x170, x131, x155);
+  fiat_p256_mulx_u32(&x173, &x174, x157, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x175, &x176, x157, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x177, &x178, x157, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x179, &x180, x157, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x181, &x182, 0x0, x180, x177);
+  fiat_p256_addcarryx_u32(&x183, &x184, x182, x178, x175);
+  fiat_p256_addcarryx_u32(&x185, &x186, 0x0, x157, x179);
+  fiat_p256_addcarryx_u32(&x187, &x188, x186, x159, x181);
+  fiat_p256_addcarryx_u32(&x189, &x190, x188, x161, x183);
+  fiat_p256_addcarryx_u32(&x191, &x192, x190, x163, (x184 + x176));
+  fiat_p256_addcarryx_u32(&x193, &x194, x192, x165, 0x0);
+  fiat_p256_addcarryx_u32(&x195, &x196, x194, x167, 0x0);
+  fiat_p256_addcarryx_u32(&x197, &x198, x196, x169, x157);
+  fiat_p256_addcarryx_u32(&x199, &x200, x198, x171, x173);
+  fiat_p256_addcarryx_u32(&x201, &x202, x200, (((uint32_t)x172 + x132) + (x156 + x134)), x174);
+  fiat_p256_mulx_u32(&x203, &x204, x3, 0x4);
+  fiat_p256_mulx_u32(&x205, &x206, x3, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x207, &x208, x3, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x209, &x210, x3, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x211, &x212, x3, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x213, &x214, x3, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x215, &x216, x3, 0x3);
+  fiat_p256_addcarryx_u32(&x217, &x218, 0x0, x214, x211);
+  fiat_p256_addcarryx_u32(&x219, &x220, x218, x212, x209);
+  fiat_p256_addcarryx_u32(&x221, &x222, x220, x210, x207);
+  fiat_p256_addcarryx_u32(&x223, &x224, x222, x208, x205);
+  fiat_p256_addcarryx_u32(&x225, &x226, x224, x206, x203);
+  fiat_p256_addcarryx_u32(&x227, &x228, 0x0, x187, x215);
+  fiat_p256_addcarryx_u32(&x229, &x230, x228, x189, x216);
+  fiat_p256_addcarryx_u32(&x231, &x232, x230, x191, x213);
+  fiat_p256_addcarryx_u32(&x233, &x234, x232, x193, x217);
+  fiat_p256_addcarryx_u32(&x235, &x236, x234, x195, x219);
+  fiat_p256_addcarryx_u32(&x237, &x238, x236, x197, x221);
+  fiat_p256_addcarryx_u32(&x239, &x240, x238, x199, x223);
+  fiat_p256_addcarryx_u32(&x241, &x242, x240, x201, x225);
+  fiat_p256_mulx_u32(&x243, &x244, x227, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x245, &x246, x227, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x247, &x248, x227, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x249, &x250, x227, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x251, &x252, 0x0, x250, x247);
+  fiat_p256_addcarryx_u32(&x253, &x254, x252, x248, x245);
+  fiat_p256_addcarryx_u32(&x255, &x256, 0x0, x227, x249);
+  fiat_p256_addcarryx_u32(&x257, &x258, x256, x229, x251);
+  fiat_p256_addcarryx_u32(&x259, &x260, x258, x231, x253);
+  fiat_p256_addcarryx_u32(&x261, &x262, x260, x233, (x254 + x246));
+  fiat_p256_addcarryx_u32(&x263, &x264, x262, x235, 0x0);
+  fiat_p256_addcarryx_u32(&x265, &x266, x264, x237, 0x0);
+  fiat_p256_addcarryx_u32(&x267, &x268, x266, x239, x227);
+  fiat_p256_addcarryx_u32(&x269, &x270, x268, x241, x243);
+  fiat_p256_addcarryx_u32(&x271, &x272, x270, (((uint32_t)x242 + x202) + (x226 + x204)), x244);
+  fiat_p256_mulx_u32(&x273, &x274, x4, 0x4);
+  fiat_p256_mulx_u32(&x275, &x276, x4, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x277, &x278, x4, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x279, &x280, x4, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x281, &x282, x4, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x283, &x284, x4, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x285, &x286, x4, 0x3);
+  fiat_p256_addcarryx_u32(&x287, &x288, 0x0, x284, x281);
+  fiat_p256_addcarryx_u32(&x289, &x290, x288, x282, x279);
+  fiat_p256_addcarryx_u32(&x291, &x292, x290, x280, x277);
+  fiat_p256_addcarryx_u32(&x293, &x294, x292, x278, x275);
+  fiat_p256_addcarryx_u32(&x295, &x296, x294, x276, x273);
+  fiat_p256_addcarryx_u32(&x297, &x298, 0x0, x257, x285);
+  fiat_p256_addcarryx_u32(&x299, &x300, x298, x259, x286);
+  fiat_p256_addcarryx_u32(&x301, &x302, x300, x261, x283);
+  fiat_p256_addcarryx_u32(&x303, &x304, x302, x263, x287);
+  fiat_p256_addcarryx_u32(&x305, &x306, x304, x265, x289);
+  fiat_p256_addcarryx_u32(&x307, &x308, x306, x267, x291);
+  fiat_p256_addcarryx_u32(&x309, &x310, x308, x269, x293);
+  fiat_p256_addcarryx_u32(&x311, &x312, x310, x271, x295);
+  fiat_p256_mulx_u32(&x313, &x314, x297, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x315, &x316, x297, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x317, &x318, x297, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x319, &x320, x297, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x321, &x322, 0x0, x320, x317);
+  fiat_p256_addcarryx_u32(&x323, &x324, x322, x318, x315);
+  fiat_p256_addcarryx_u32(&x325, &x326, 0x0, x297, x319);
+  fiat_p256_addcarryx_u32(&x327, &x328, x326, x299, x321);
+  fiat_p256_addcarryx_u32(&x329, &x330, x328, x301, x323);
+  fiat_p256_addcarryx_u32(&x331, &x332, x330, x303, (x324 + x316));
+  fiat_p256_addcarryx_u32(&x333, &x334, x332, x305, 0x0);
+  fiat_p256_addcarryx_u32(&x335, &x336, x334, x307, 0x0);
+  fiat_p256_addcarryx_u32(&x337, &x338, x336, x309, x297);
+  fiat_p256_addcarryx_u32(&x339, &x340, x338, x311, x313);
+  fiat_p256_addcarryx_u32(&x341, &x342, x340, (((uint32_t)x312 + x272) + (x296 + x274)), x314);
+  fiat_p256_mulx_u32(&x343, &x344, x5, 0x4);
+  fiat_p256_mulx_u32(&x345, &x346, x5, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x347, &x348, x5, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x349, &x350, x5, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x351, &x352, x5, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x353, &x354, x5, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x355, &x356, x5, 0x3);
+  fiat_p256_addcarryx_u32(&x357, &x358, 0x0, x354, x351);
+  fiat_p256_addcarryx_u32(&x359, &x360, x358, x352, x349);
+  fiat_p256_addcarryx_u32(&x361, &x362, x360, x350, x347);
+  fiat_p256_addcarryx_u32(&x363, &x364, x362, x348, x345);
+  fiat_p256_addcarryx_u32(&x365, &x366, x364, x346, x343);
+  fiat_p256_addcarryx_u32(&x367, &x368, 0x0, x327, x355);
+  fiat_p256_addcarryx_u32(&x369, &x370, x368, x329, x356);
+  fiat_p256_addcarryx_u32(&x371, &x372, x370, x331, x353);
+  fiat_p256_addcarryx_u32(&x373, &x374, x372, x333, x357);
+  fiat_p256_addcarryx_u32(&x375, &x376, x374, x335, x359);
+  fiat_p256_addcarryx_u32(&x377, &x378, x376, x337, x361);
+  fiat_p256_addcarryx_u32(&x379, &x380, x378, x339, x363);
+  fiat_p256_addcarryx_u32(&x381, &x382, x380, x341, x365);
+  fiat_p256_mulx_u32(&x383, &x384, x367, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x385, &x386, x367, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x387, &x388, x367, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x389, &x390, x367, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x391, &x392, 0x0, x390, x387);
+  fiat_p256_addcarryx_u32(&x393, &x394, x392, x388, x385);
+  fiat_p256_addcarryx_u32(&x395, &x396, 0x0, x367, x389);
+  fiat_p256_addcarryx_u32(&x397, &x398, x396, x369, x391);
+  fiat_p256_addcarryx_u32(&x399, &x400, x398, x371, x393);
+  fiat_p256_addcarryx_u32(&x401, &x402, x400, x373, (x394 + x386));
+  fiat_p256_addcarryx_u32(&x403, &x404, x402, x375, 0x0);
+  fiat_p256_addcarryx_u32(&x405, &x406, x404, x377, 0x0);
+  fiat_p256_addcarryx_u32(&x407, &x408, x406, x379, x367);
+  fiat_p256_addcarryx_u32(&x409, &x410, x408, x381, x383);
+  fiat_p256_addcarryx_u32(&x411, &x412, x410, (((uint32_t)x382 + x342) + (x366 + x344)), x384);
+  fiat_p256_mulx_u32(&x413, &x414, x6, 0x4);
+  fiat_p256_mulx_u32(&x415, &x416, x6, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x417, &x418, x6, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x419, &x420, x6, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x421, &x422, x6, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x423, &x424, x6, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x425, &x426, x6, 0x3);
+  fiat_p256_addcarryx_u32(&x427, &x428, 0x0, x424, x421);
+  fiat_p256_addcarryx_u32(&x429, &x430, x428, x422, x419);
+  fiat_p256_addcarryx_u32(&x431, &x432, x430, x420, x417);
+  fiat_p256_addcarryx_u32(&x433, &x434, x432, x418, x415);
+  fiat_p256_addcarryx_u32(&x435, &x436, x434, x416, x413);
+  fiat_p256_addcarryx_u32(&x437, &x438, 0x0, x397, x425);
+  fiat_p256_addcarryx_u32(&x439, &x440, x438, x399, x426);
+  fiat_p256_addcarryx_u32(&x441, &x442, x440, x401, x423);
+  fiat_p256_addcarryx_u32(&x443, &x444, x442, x403, x427);
+  fiat_p256_addcarryx_u32(&x445, &x446, x444, x405, x429);
+  fiat_p256_addcarryx_u32(&x447, &x448, x446, x407, x431);
+  fiat_p256_addcarryx_u32(&x449, &x450, x448, x409, x433);
+  fiat_p256_addcarryx_u32(&x451, &x452, x450, x411, x435);
+  fiat_p256_mulx_u32(&x453, &x454, x437, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x455, &x456, x437, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x457, &x458, x437, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x459, &x460, x437, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x461, &x462, 0x0, x460, x457);
+  fiat_p256_addcarryx_u32(&x463, &x464, x462, x458, x455);
+  fiat_p256_addcarryx_u32(&x465, &x466, 0x0, x437, x459);
+  fiat_p256_addcarryx_u32(&x467, &x468, x466, x439, x461);
+  fiat_p256_addcarryx_u32(&x469, &x470, x468, x441, x463);
+  fiat_p256_addcarryx_u32(&x471, &x472, x470, x443, (x464 + x456));
+  fiat_p256_addcarryx_u32(&x473, &x474, x472, x445, 0x0);
+  fiat_p256_addcarryx_u32(&x475, &x476, x474, x447, 0x0);
+  fiat_p256_addcarryx_u32(&x477, &x478, x476, x449, x437);
+  fiat_p256_addcarryx_u32(&x479, &x480, x478, x451, x453);
+  fiat_p256_addcarryx_u32(&x481, &x482, x480, (((uint32_t)x452 + x412) + (x436 + x414)), x454);
+  fiat_p256_mulx_u32(&x483, &x484, x7, 0x4);
+  fiat_p256_mulx_u32(&x485, &x486, x7, UINT32_C(0xfffffffd));
+  fiat_p256_mulx_u32(&x487, &x488, x7, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x489, &x490, x7, UINT32_C(0xfffffffe));
+  fiat_p256_mulx_u32(&x491, &x492, x7, UINT32_C(0xfffffffb));
+  fiat_p256_mulx_u32(&x493, &x494, x7, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x495, &x496, x7, 0x3);
+  fiat_p256_addcarryx_u32(&x497, &x498, 0x0, x494, x491);
+  fiat_p256_addcarryx_u32(&x499, &x500, x498, x492, x489);
+  fiat_p256_addcarryx_u32(&x501, &x502, x500, x490, x487);
+  fiat_p256_addcarryx_u32(&x503, &x504, x502, x488, x485);
+  fiat_p256_addcarryx_u32(&x505, &x506, x504, x486, x483);
+  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x467, x495);
+  fiat_p256_addcarryx_u32(&x509, &x510, x508, x469, x496);
+  fiat_p256_addcarryx_u32(&x511, &x512, x510, x471, x493);
+  fiat_p256_addcarryx_u32(&x513, &x514, x512, x473, x497);
+  fiat_p256_addcarryx_u32(&x515, &x516, x514, x475, x499);
+  fiat_p256_addcarryx_u32(&x517, &x518, x516, x477, x501);
+  fiat_p256_addcarryx_u32(&x519, &x520, x518, x479, x503);
+  fiat_p256_addcarryx_u32(&x521, &x522, x520, x481, x505);
+  fiat_p256_mulx_u32(&x523, &x524, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x531, &x532, 0x0, x530, x527);
+  fiat_p256_addcarryx_u32(&x533, &x534, x532, x528, x525);
+  fiat_p256_addcarryx_u32(&x535, &x536, 0x0, x507, x529);
+  fiat_p256_addcarryx_u32(&x537, &x538, x536, x509, x531);
+  fiat_p256_addcarryx_u32(&x539, &x540, x538, x511, x533);
+  fiat_p256_addcarryx_u32(&x541, &x542, x540, x513, (x534 + x526));
+  fiat_p256_addcarryx_u32(&x543, &x544, x542, x515, 0x0);
+  fiat_p256_addcarryx_u32(&x545, &x546, x544, x517, 0x0);
+  fiat_p256_addcarryx_u32(&x547, &x548, x546, x519, x507);
+  fiat_p256_addcarryx_u32(&x549, &x550, x548, x521, x523);
+  fiat_p256_addcarryx_u32(&x551, &x552, x550, (((uint32_t)x522 + x482) + (x506 + x484)), x524);
+  fiat_p256_subborrowx_u32(&x553, &x554, 0x0, x537, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x555, &x556, x554, x539, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x557, &x558, x556, x541, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x559, &x560, x558, x543, 0x0);
+  fiat_p256_subborrowx_u32(&x561, &x562, x560, x545, 0x0);
+  fiat_p256_subborrowx_u32(&x563, &x564, x562, x547, 0x0);
+  fiat_p256_subborrowx_u32(&x565, &x566, x564, x549, 0x1);
+  fiat_p256_subborrowx_u32(&x567, &x568, x566, x551, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x569, &x570, x568, x552, 0x0);
+  fiat_p256_cmovznz_u32(&x571, x570, x553, x537);
+  fiat_p256_cmovznz_u32(&x572, x570, x555, x539);
+  fiat_p256_cmovznz_u32(&x573, x570, x557, x541);
+  fiat_p256_cmovznz_u32(&x574, x570, x559, x543);
+  fiat_p256_cmovznz_u32(&x575, x570, x561, x545);
+  fiat_p256_cmovznz_u32(&x576, x570, x563, x547);
+  fiat_p256_cmovznz_u32(&x577, x570, x565, x549);
+  fiat_p256_cmovznz_u32(&x578, x570, x567, x551);
+  out1[0] = x571;
+  out1[1] = x572;
+  out1[2] = x573;
+  out1[3] = x574;
+  out1[4] = x575;
+  out1[5] = x576;
+  out1[6] = x577;
+  out1[7] = x578;
+}
+
+/*
  * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
@@ -2920,13 +3897,15 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffff]
  */
-static void fiat_p256_nonzero(uint32_t* out1, const uint32_t arg1[8]) {
-  uint32_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | ((arg1[4]) | ((arg1[5]) | ((arg1[6]) | ((arg1[7]) | (uint32_t)0x0))))))));
+static FIAT_P256_FIAT_INLINE void fiat_p256_nonzero(uint32_t* out1, const uint32_t arg1[8]) {
+  uint32_t x1;
+  x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | ((arg1[4]) | ((arg1[5]) | ((arg1[6]) | (arg1[7]))))))));
   *out1 = x1;
 }
 
 /*
  * The function fiat_p256_selectznz is a multi-limb conditional select.
+ *
  * Postconditions:
  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
  *
@@ -2937,22 +3916,22 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_selectznz(uint32_t out1[8], fiat_p256_uint1 arg1, const uint32_t arg2[8], const uint32_t arg3[8]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_selectznz(uint32_t out1[8], fiat_p256_uint1 arg1, const uint32_t arg2[8], const uint32_t arg3[8]) {
   uint32_t x1;
-  fiat_p256_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
   uint32_t x2;
-  fiat_p256_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
   uint32_t x3;
-  fiat_p256_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
   uint32_t x4;
-  fiat_p256_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
   uint32_t x5;
-  fiat_p256_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
   uint32_t x6;
-  fiat_p256_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
   uint32_t x7;
-  fiat_p256_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
   uint32_t x8;
+  fiat_p256_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
+  fiat_p256_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
+  fiat_p256_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
+  fiat_p256_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
+  fiat_p256_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
+  fiat_p256_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
+  fiat_p256_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
   fiat_p256_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
   out1[0] = x1;
   out1[1] = x2;
@@ -2965,7 +3944,8 @@
 }
 
 /*
- * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order.
+ * The function fiat_p256_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
@@ -2976,106 +3956,156 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
  */
-static void fiat_p256_to_bytes(uint8_t out1[32], const uint32_t arg1[8]) {
-  uint32_t x1 = (arg1[7]);
-  uint32_t x2 = (arg1[6]);
-  uint32_t x3 = (arg1[5]);
-  uint32_t x4 = (arg1[4]);
-  uint32_t x5 = (arg1[3]);
-  uint32_t x6 = (arg1[2]);
-  uint32_t x7 = (arg1[1]);
-  uint32_t x8 = (arg1[0]);
-  uint32_t x9 = (x8 >> 8);
-  uint8_t x10 = (uint8_t)(x8 & UINT8_C(0xff));
-  uint32_t x11 = (x9 >> 8);
-  uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff));
-  uint8_t x13 = (uint8_t)(x11 >> 8);
-  uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff));
-  uint8_t x15 = (uint8_t)(x13 & UINT8_C(0xff));
-  uint32_t x16 = (x7 >> 8);
-  uint8_t x17 = (uint8_t)(x7 & UINT8_C(0xff));
-  uint32_t x18 = (x16 >> 8);
-  uint8_t x19 = (uint8_t)(x16 & UINT8_C(0xff));
-  uint8_t x20 = (uint8_t)(x18 >> 8);
-  uint8_t x21 = (uint8_t)(x18 & UINT8_C(0xff));
-  uint8_t x22 = (uint8_t)(x20 & UINT8_C(0xff));
-  uint32_t x23 = (x6 >> 8);
-  uint8_t x24 = (uint8_t)(x6 & UINT8_C(0xff));
-  uint32_t x25 = (x23 >> 8);
-  uint8_t x26 = (uint8_t)(x23 & UINT8_C(0xff));
-  uint8_t x27 = (uint8_t)(x25 >> 8);
-  uint8_t x28 = (uint8_t)(x25 & UINT8_C(0xff));
-  uint8_t x29 = (uint8_t)(x27 & UINT8_C(0xff));
-  uint32_t x30 = (x5 >> 8);
-  uint8_t x31 = (uint8_t)(x5 & UINT8_C(0xff));
-  uint32_t x32 = (x30 >> 8);
-  uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
-  uint8_t x34 = (uint8_t)(x32 >> 8);
-  uint8_t x35 = (uint8_t)(x32 & UINT8_C(0xff));
-  uint8_t x36 = (uint8_t)(x34 & UINT8_C(0xff));
-  uint32_t x37 = (x4 >> 8);
-  uint8_t x38 = (uint8_t)(x4 & UINT8_C(0xff));
-  uint32_t x39 = (x37 >> 8);
-  uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff));
-  uint8_t x41 = (uint8_t)(x39 >> 8);
-  uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
-  uint8_t x43 = (uint8_t)(x41 & UINT8_C(0xff));
-  uint32_t x44 = (x3 >> 8);
-  uint8_t x45 = (uint8_t)(x3 & UINT8_C(0xff));
-  uint32_t x46 = (x44 >> 8);
-  uint8_t x47 = (uint8_t)(x44 & UINT8_C(0xff));
-  uint8_t x48 = (uint8_t)(x46 >> 8);
-  uint8_t x49 = (uint8_t)(x46 & UINT8_C(0xff));
-  uint8_t x50 = (uint8_t)(x48 & UINT8_C(0xff));
-  uint32_t x51 = (x2 >> 8);
-  uint8_t x52 = (uint8_t)(x2 & UINT8_C(0xff));
-  uint32_t x53 = (x51 >> 8);
-  uint8_t x54 = (uint8_t)(x51 & UINT8_C(0xff));
-  uint8_t x55 = (uint8_t)(x53 >> 8);
-  uint8_t x56 = (uint8_t)(x53 & UINT8_C(0xff));
-  uint8_t x57 = (uint8_t)(x55 & UINT8_C(0xff));
-  uint32_t x58 = (x1 >> 8);
-  uint8_t x59 = (uint8_t)(x1 & UINT8_C(0xff));
-  uint32_t x60 = (x58 >> 8);
-  uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
-  uint8_t x62 = (uint8_t)(x60 >> 8);
-  uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
-  out1[0] = x10;
-  out1[1] = x12;
-  out1[2] = x14;
-  out1[3] = x15;
-  out1[4] = x17;
-  out1[5] = x19;
-  out1[6] = x21;
-  out1[7] = x22;
-  out1[8] = x24;
-  out1[9] = x26;
-  out1[10] = x28;
-  out1[11] = x29;
-  out1[12] = x31;
-  out1[13] = x33;
-  out1[14] = x35;
-  out1[15] = x36;
-  out1[16] = x38;
-  out1[17] = x40;
-  out1[18] = x42;
-  out1[19] = x43;
-  out1[20] = x45;
-  out1[21] = x47;
-  out1[22] = x49;
-  out1[23] = x50;
-  out1[24] = x52;
-  out1[25] = x54;
-  out1[26] = x56;
-  out1[27] = x57;
-  out1[28] = x59;
-  out1[29] = x61;
-  out1[30] = x63;
-  out1[31] = x62;
+static FIAT_P256_FIAT_INLINE void fiat_p256_to_bytes(uint8_t out1[32], const uint32_t arg1[8]) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint32_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint8_t x9;
+  uint32_t x10;
+  uint8_t x11;
+  uint32_t x12;
+  uint8_t x13;
+  uint8_t x14;
+  uint8_t x15;
+  uint32_t x16;
+  uint8_t x17;
+  uint32_t x18;
+  uint8_t x19;
+  uint8_t x20;
+  uint8_t x21;
+  uint32_t x22;
+  uint8_t x23;
+  uint32_t x24;
+  uint8_t x25;
+  uint8_t x26;
+  uint8_t x27;
+  uint32_t x28;
+  uint8_t x29;
+  uint32_t x30;
+  uint8_t x31;
+  uint8_t x32;
+  uint8_t x33;
+  uint32_t x34;
+  uint8_t x35;
+  uint32_t x36;
+  uint8_t x37;
+  uint8_t x38;
+  uint8_t x39;
+  uint32_t x40;
+  uint8_t x41;
+  uint32_t x42;
+  uint8_t x43;
+  uint8_t x44;
+  uint8_t x45;
+  uint32_t x46;
+  uint8_t x47;
+  uint32_t x48;
+  uint8_t x49;
+  uint8_t x50;
+  uint8_t x51;
+  uint32_t x52;
+  uint8_t x53;
+  uint32_t x54;
+  uint8_t x55;
+  uint8_t x56;
+  x1 = (arg1[7]);
+  x2 = (arg1[6]);
+  x3 = (arg1[5]);
+  x4 = (arg1[4]);
+  x5 = (arg1[3]);
+  x6 = (arg1[2]);
+  x7 = (arg1[1]);
+  x8 = (arg1[0]);
+  x9 = (uint8_t)(x8 & UINT8_C(0xff));
+  x10 = (x8 >> 8);
+  x11 = (uint8_t)(x10 & UINT8_C(0xff));
+  x12 = (x10 >> 8);
+  x13 = (uint8_t)(x12 & UINT8_C(0xff));
+  x14 = (uint8_t)(x12 >> 8);
+  x15 = (uint8_t)(x7 & UINT8_C(0xff));
+  x16 = (x7 >> 8);
+  x17 = (uint8_t)(x16 & UINT8_C(0xff));
+  x18 = (x16 >> 8);
+  x19 = (uint8_t)(x18 & UINT8_C(0xff));
+  x20 = (uint8_t)(x18 >> 8);
+  x21 = (uint8_t)(x6 & UINT8_C(0xff));
+  x22 = (x6 >> 8);
+  x23 = (uint8_t)(x22 & UINT8_C(0xff));
+  x24 = (x22 >> 8);
+  x25 = (uint8_t)(x24 & UINT8_C(0xff));
+  x26 = (uint8_t)(x24 >> 8);
+  x27 = (uint8_t)(x5 & UINT8_C(0xff));
+  x28 = (x5 >> 8);
+  x29 = (uint8_t)(x28 & UINT8_C(0xff));
+  x30 = (x28 >> 8);
+  x31 = (uint8_t)(x30 & UINT8_C(0xff));
+  x32 = (uint8_t)(x30 >> 8);
+  x33 = (uint8_t)(x4 & UINT8_C(0xff));
+  x34 = (x4 >> 8);
+  x35 = (uint8_t)(x34 & UINT8_C(0xff));
+  x36 = (x34 >> 8);
+  x37 = (uint8_t)(x36 & UINT8_C(0xff));
+  x38 = (uint8_t)(x36 >> 8);
+  x39 = (uint8_t)(x3 & UINT8_C(0xff));
+  x40 = (x3 >> 8);
+  x41 = (uint8_t)(x40 & UINT8_C(0xff));
+  x42 = (x40 >> 8);
+  x43 = (uint8_t)(x42 & UINT8_C(0xff));
+  x44 = (uint8_t)(x42 >> 8);
+  x45 = (uint8_t)(x2 & UINT8_C(0xff));
+  x46 = (x2 >> 8);
+  x47 = (uint8_t)(x46 & UINT8_C(0xff));
+  x48 = (x46 >> 8);
+  x49 = (uint8_t)(x48 & UINT8_C(0xff));
+  x50 = (uint8_t)(x48 >> 8);
+  x51 = (uint8_t)(x1 & UINT8_C(0xff));
+  x52 = (x1 >> 8);
+  x53 = (uint8_t)(x52 & UINT8_C(0xff));
+  x54 = (x52 >> 8);
+  x55 = (uint8_t)(x54 & UINT8_C(0xff));
+  x56 = (uint8_t)(x54 >> 8);
+  out1[0] = x9;
+  out1[1] = x11;
+  out1[2] = x13;
+  out1[3] = x14;
+  out1[4] = x15;
+  out1[5] = x17;
+  out1[6] = x19;
+  out1[7] = x20;
+  out1[8] = x21;
+  out1[9] = x23;
+  out1[10] = x25;
+  out1[11] = x26;
+  out1[12] = x27;
+  out1[13] = x29;
+  out1[14] = x31;
+  out1[15] = x32;
+  out1[16] = x33;
+  out1[17] = x35;
+  out1[18] = x37;
+  out1[19] = x38;
+  out1[20] = x39;
+  out1[21] = x41;
+  out1[22] = x43;
+  out1[23] = x44;
+  out1[24] = x45;
+  out1[25] = x47;
+  out1[26] = x49;
+  out1[27] = x50;
+  out1[28] = x51;
+  out1[29] = x53;
+  out1[30] = x55;
+  out1[31] = x56;
 }
 
 /*
- * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order.
+ * The function fiat_p256_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+ *
  * Preconditions:
  *   0 ≤ bytes_eval arg1 < m
  * Postconditions:
@@ -3087,61 +4117,644 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
  */
-static void fiat_p256_from_bytes(uint32_t out1[8], const uint8_t arg1[32]) {
-  uint32_t x1 = ((uint32_t)(arg1[31]) << 24);
-  uint32_t x2 = ((uint32_t)(arg1[30]) << 16);
-  uint32_t x3 = ((uint32_t)(arg1[29]) << 8);
-  uint8_t x4 = (arg1[28]);
-  uint32_t x5 = ((uint32_t)(arg1[27]) << 24);
-  uint32_t x6 = ((uint32_t)(arg1[26]) << 16);
-  uint32_t x7 = ((uint32_t)(arg1[25]) << 8);
-  uint8_t x8 = (arg1[24]);
-  uint32_t x9 = ((uint32_t)(arg1[23]) << 24);
-  uint32_t x10 = ((uint32_t)(arg1[22]) << 16);
-  uint32_t x11 = ((uint32_t)(arg1[21]) << 8);
-  uint8_t x12 = (arg1[20]);
-  uint32_t x13 = ((uint32_t)(arg1[19]) << 24);
-  uint32_t x14 = ((uint32_t)(arg1[18]) << 16);
-  uint32_t x15 = ((uint32_t)(arg1[17]) << 8);
-  uint8_t x16 = (arg1[16]);
-  uint32_t x17 = ((uint32_t)(arg1[15]) << 24);
-  uint32_t x18 = ((uint32_t)(arg1[14]) << 16);
-  uint32_t x19 = ((uint32_t)(arg1[13]) << 8);
-  uint8_t x20 = (arg1[12]);
-  uint32_t x21 = ((uint32_t)(arg1[11]) << 24);
-  uint32_t x22 = ((uint32_t)(arg1[10]) << 16);
-  uint32_t x23 = ((uint32_t)(arg1[9]) << 8);
-  uint8_t x24 = (arg1[8]);
-  uint32_t x25 = ((uint32_t)(arg1[7]) << 24);
-  uint32_t x26 = ((uint32_t)(arg1[6]) << 16);
-  uint32_t x27 = ((uint32_t)(arg1[5]) << 8);
-  uint8_t x28 = (arg1[4]);
-  uint32_t x29 = ((uint32_t)(arg1[3]) << 24);
-  uint32_t x30 = ((uint32_t)(arg1[2]) << 16);
-  uint32_t x31 = ((uint32_t)(arg1[1]) << 8);
-  uint8_t x32 = (arg1[0]);
-  uint32_t x33 = (x32 + (x31 + (x30 + x29)));
-  uint32_t x34 = (x33 & UINT32_C(0xffffffff));
-  uint32_t x35 = (x4 + (x3 + (x2 + x1)));
-  uint32_t x36 = (x8 + (x7 + (x6 + x5)));
-  uint32_t x37 = (x12 + (x11 + (x10 + x9)));
-  uint32_t x38 = (x16 + (x15 + (x14 + x13)));
-  uint32_t x39 = (x20 + (x19 + (x18 + x17)));
-  uint32_t x40 = (x24 + (x23 + (x22 + x21)));
-  uint32_t x41 = (x28 + (x27 + (x26 + x25)));
-  uint32_t x42 = (x41 & UINT32_C(0xffffffff));
-  uint32_t x43 = (x40 & UINT32_C(0xffffffff));
-  uint32_t x44 = (x39 & UINT32_C(0xffffffff));
-  uint32_t x45 = (x38 & UINT32_C(0xffffffff));
-  uint32_t x46 = (x37 & UINT32_C(0xffffffff));
-  uint32_t x47 = (x36 & UINT32_C(0xffffffff));
-  out1[0] = x34;
-  out1[1] = x42;
-  out1[2] = x43;
+static FIAT_P256_FIAT_INLINE void fiat_p256_from_bytes(uint32_t out1[8], const uint8_t arg1[32]) {
+  uint32_t x1;
+  uint32_t x2;
+  uint32_t x3;
+  uint8_t x4;
+  uint32_t x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint8_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint32_t x11;
+  uint8_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint8_t x16;
+  uint32_t x17;
+  uint32_t x18;
+  uint32_t x19;
+  uint8_t x20;
+  uint32_t x21;
+  uint32_t x22;
+  uint32_t x23;
+  uint8_t x24;
+  uint32_t x25;
+  uint32_t x26;
+  uint32_t x27;
+  uint8_t x28;
+  uint32_t x29;
+  uint32_t x30;
+  uint32_t x31;
+  uint8_t x32;
+  uint32_t x33;
+  uint32_t x34;
+  uint32_t x35;
+  uint32_t x36;
+  uint32_t x37;
+  uint32_t x38;
+  uint32_t x39;
+  uint32_t x40;
+  uint32_t x41;
+  uint32_t x42;
+  uint32_t x43;
+  uint32_t x44;
+  uint32_t x45;
+  uint32_t x46;
+  uint32_t x47;
+  uint32_t x48;
+  uint32_t x49;
+  uint32_t x50;
+  uint32_t x51;
+  uint32_t x52;
+  uint32_t x53;
+  uint32_t x54;
+  uint32_t x55;
+  uint32_t x56;
+  x1 = ((uint32_t)(arg1[31]) << 24);
+  x2 = ((uint32_t)(arg1[30]) << 16);
+  x3 = ((uint32_t)(arg1[29]) << 8);
+  x4 = (arg1[28]);
+  x5 = ((uint32_t)(arg1[27]) << 24);
+  x6 = ((uint32_t)(arg1[26]) << 16);
+  x7 = ((uint32_t)(arg1[25]) << 8);
+  x8 = (arg1[24]);
+  x9 = ((uint32_t)(arg1[23]) << 24);
+  x10 = ((uint32_t)(arg1[22]) << 16);
+  x11 = ((uint32_t)(arg1[21]) << 8);
+  x12 = (arg1[20]);
+  x13 = ((uint32_t)(arg1[19]) << 24);
+  x14 = ((uint32_t)(arg1[18]) << 16);
+  x15 = ((uint32_t)(arg1[17]) << 8);
+  x16 = (arg1[16]);
+  x17 = ((uint32_t)(arg1[15]) << 24);
+  x18 = ((uint32_t)(arg1[14]) << 16);
+  x19 = ((uint32_t)(arg1[13]) << 8);
+  x20 = (arg1[12]);
+  x21 = ((uint32_t)(arg1[11]) << 24);
+  x22 = ((uint32_t)(arg1[10]) << 16);
+  x23 = ((uint32_t)(arg1[9]) << 8);
+  x24 = (arg1[8]);
+  x25 = ((uint32_t)(arg1[7]) << 24);
+  x26 = ((uint32_t)(arg1[6]) << 16);
+  x27 = ((uint32_t)(arg1[5]) << 8);
+  x28 = (arg1[4]);
+  x29 = ((uint32_t)(arg1[3]) << 24);
+  x30 = ((uint32_t)(arg1[2]) << 16);
+  x31 = ((uint32_t)(arg1[1]) << 8);
+  x32 = (arg1[0]);
+  x33 = (x31 + (uint32_t)x32);
+  x34 = (x30 + x33);
+  x35 = (x29 + x34);
+  x36 = (x27 + (uint32_t)x28);
+  x37 = (x26 + x36);
+  x38 = (x25 + x37);
+  x39 = (x23 + (uint32_t)x24);
+  x40 = (x22 + x39);
+  x41 = (x21 + x40);
+  x42 = (x19 + (uint32_t)x20);
+  x43 = (x18 + x42);
+  x44 = (x17 + x43);
+  x45 = (x15 + (uint32_t)x16);
+  x46 = (x14 + x45);
+  x47 = (x13 + x46);
+  x48 = (x11 + (uint32_t)x12);
+  x49 = (x10 + x48);
+  x50 = (x9 + x49);
+  x51 = (x7 + (uint32_t)x8);
+  x52 = (x6 + x51);
+  x53 = (x5 + x52);
+  x54 = (x3 + (uint32_t)x4);
+  x55 = (x2 + x54);
+  x56 = (x1 + x55);
+  out1[0] = x35;
+  out1[1] = x38;
+  out1[2] = x41;
   out1[3] = x44;
-  out1[4] = x45;
-  out1[5] = x46;
-  out1[6] = x47;
-  out1[7] = x35;
+  out1[4] = x47;
+  out1[5] = x50;
+  out1[6] = x53;
+  out1[7] = x56;
 }
 
+/*
+ * The function fiat_p256_set_one returns the field element one in the Montgomery domain.
+ *
+ * Postconditions:
+ *   eval (from_montgomery out1) mod m = 1 mod m
+ *   0 ≤ eval out1 < m
+ *
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_set_one(fiat_p256_montgomery_domain_field_element out1) {
+  out1[0] = 0x1;
+  out1[1] = 0x0;
+  out1[2] = 0x0;
+  out1[3] = UINT32_C(0xffffffff);
+  out1[4] = UINT32_C(0xffffffff);
+  out1[5] = UINT32_C(0xffffffff);
+  out1[6] = UINT32_C(0xfffffffe);
+  out1[7] = 0x0;
+}
+
+/*
+ * The function fiat_p256_msat returns the saturated representation of the prime modulus.
+ *
+ * Postconditions:
+ *   twos_complement_eval out1 = m
+ *   0 ≤ eval out1 < m
+ *
+ * Output Bounds:
+ *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_msat(uint32_t out1[9]) {
+  out1[0] = UINT32_C(0xffffffff);
+  out1[1] = UINT32_C(0xffffffff);
+  out1[2] = UINT32_C(0xffffffff);
+  out1[3] = 0x0;
+  out1[4] = 0x0;
+  out1[5] = 0x0;
+  out1[6] = 0x1;
+  out1[7] = UINT32_C(0xffffffff);
+  out1[8] = 0x0;
+}
+
+/*
+ * The function fiat_p256_divstep computes a divstep.
+ *
+ * Preconditions:
+ *   0 ≤ eval arg4 < m
+ *   0 ≤ eval arg5 < m
+ * Postconditions:
+ *   out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+ *   twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+ *   twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+ *   eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+ *   eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+ *   0 ≤ eval out5 < m
+ *   0 ≤ eval out5 < m
+ *   0 ≤ eval out2 < m
+ *   0 ≤ eval out3 < m
+ *
+ * Input Bounds:
+ *   arg1: [0x0 ~> 0xffffffff]
+ *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   arg4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   arg5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ *   out1: [0x0 ~> 0xffffffff]
+ *   out2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   out3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   out4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ *   out5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_divstep(uint32_t* out1, uint32_t out2[9], uint32_t out3[9], uint32_t out4[8], uint32_t out5[8], uint32_t arg1, const uint32_t arg2[9], const uint32_t arg3[9], const uint32_t arg4[8], const uint32_t arg5[8]) {
+  uint32_t x1;
+  fiat_p256_uint1 x2;
+  fiat_p256_uint1 x3;
+  uint32_t x4;
+  fiat_p256_uint1 x5;
+  uint32_t x6;
+  uint32_t x7;
+  uint32_t x8;
+  uint32_t x9;
+  uint32_t x10;
+  uint32_t x11;
+  uint32_t x12;
+  uint32_t x13;
+  uint32_t x14;
+  uint32_t x15;
+  uint32_t x16;
+  fiat_p256_uint1 x17;
+  uint32_t x18;
+  fiat_p256_uint1 x19;
+  uint32_t x20;
+  fiat_p256_uint1 x21;
+  uint32_t x22;
+  fiat_p256_uint1 x23;
+  uint32_t x24;
+  fiat_p256_uint1 x25;
+  uint32_t x26;
+  fiat_p256_uint1 x27;
+  uint32_t x28;
+  fiat_p256_uint1 x29;
+  uint32_t x30;
+  fiat_p256_uint1 x31;
+  uint32_t x32;
+  fiat_p256_uint1 x33;
+  uint32_t x34;
+  uint32_t x35;
+  uint32_t x36;
+  uint32_t x37;
+  uint32_t x38;
+  uint32_t x39;
+  uint32_t x40;
+  uint32_t x41;
+  uint32_t x42;
+  uint32_t x43;
+  uint32_t x44;
+  uint32_t x45;
+  uint32_t x46;
+  uint32_t x47;
+  uint32_t x48;
+  uint32_t x49;
+  uint32_t x50;
+  uint32_t x51;
+  fiat_p256_uint1 x52;
+  uint32_t x53;
+  fiat_p256_uint1 x54;
+  uint32_t x55;
+  fiat_p256_uint1 x56;
+  uint32_t x57;
+  fiat_p256_uint1 x58;
+  uint32_t x59;
+  fiat_p256_uint1 x60;
+  uint32_t x61;
+  fiat_p256_uint1 x62;
+  uint32_t x63;
+  fiat_p256_uint1 x64;
+  uint32_t x65;
+  fiat_p256_uint1 x66;
+  uint32_t x67;
+  fiat_p256_uint1 x68;
+  uint32_t x69;
+  fiat_p256_uint1 x70;
+  uint32_t x71;
+  fiat_p256_uint1 x72;
+  uint32_t x73;
+  fiat_p256_uint1 x74;
+  uint32_t x75;
+  fiat_p256_uint1 x76;
+  uint32_t x77;
+  fiat_p256_uint1 x78;
+  uint32_t x79;
+  fiat_p256_uint1 x80;
+  uint32_t x81;
+  fiat_p256_uint1 x82;
+  uint32_t x83;
+  fiat_p256_uint1 x84;
+  uint32_t x85;
+  uint32_t x86;
+  uint32_t x87;
+  uint32_t x88;
+  uint32_t x89;
+  uint32_t x90;
+  uint32_t x91;
+  uint32_t x92;
+  uint32_t x93;
+  fiat_p256_uint1 x94;
+  uint32_t x95;
+  fiat_p256_uint1 x96;
+  uint32_t x97;
+  fiat_p256_uint1 x98;
+  uint32_t x99;
+  fiat_p256_uint1 x100;
+  uint32_t x101;
+  fiat_p256_uint1 x102;
+  uint32_t x103;
+  fiat_p256_uint1 x104;
+  uint32_t x105;
+  fiat_p256_uint1 x106;
+  uint32_t x107;
+  fiat_p256_uint1 x108;
+  uint32_t x109;
+  uint32_t x110;
+  fiat_p256_uint1 x111;
+  uint32_t x112;
+  fiat_p256_uint1 x113;
+  uint32_t x114;
+  fiat_p256_uint1 x115;
+  uint32_t x116;
+  fiat_p256_uint1 x117;
+  uint32_t x118;
+  fiat_p256_uint1 x119;
+  uint32_t x120;
+  fiat_p256_uint1 x121;
+  uint32_t x122;
+  fiat_p256_uint1 x123;
+  uint32_t x124;
+  fiat_p256_uint1 x125;
+  uint32_t x126;
+  uint32_t x127;
+  uint32_t x128;
+  uint32_t x129;
+  uint32_t x130;
+  uint32_t x131;
+  uint32_t x132;
+  uint32_t x133;
+  fiat_p256_uint1 x134;
+  uint32_t x135;
+  uint32_t x136;
+  uint32_t x137;
+  uint32_t x138;
+  uint32_t x139;
+  uint32_t x140;
+  uint32_t x141;
+  uint32_t x142;
+  uint32_t x143;
+  uint32_t x144;
+  fiat_p256_uint1 x145;
+  uint32_t x146;
+  fiat_p256_uint1 x147;
+  uint32_t x148;
+  fiat_p256_uint1 x149;
+  uint32_t x150;
+  fiat_p256_uint1 x151;
+  uint32_t x152;
+  fiat_p256_uint1 x153;
+  uint32_t x154;
+  fiat_p256_uint1 x155;
+  uint32_t x156;
+  fiat_p256_uint1 x157;
+  uint32_t x158;
+  fiat_p256_uint1 x159;
+  uint32_t x160;
+  fiat_p256_uint1 x161;
+  uint32_t x162;
+  uint32_t x163;
+  uint32_t x164;
+  uint32_t x165;
+  uint32_t x166;
+  uint32_t x167;
+  uint32_t x168;
+  uint32_t x169;
+  uint32_t x170;
+  fiat_p256_uint1 x171;
+  uint32_t x172;
+  fiat_p256_uint1 x173;
+  uint32_t x174;
+  fiat_p256_uint1 x175;
+  uint32_t x176;
+  fiat_p256_uint1 x177;
+  uint32_t x178;
+  fiat_p256_uint1 x179;
+  uint32_t x180;
+  fiat_p256_uint1 x181;
+  uint32_t x182;
+  fiat_p256_uint1 x183;
+  uint32_t x184;
+  fiat_p256_uint1 x185;
+  uint32_t x186;
+  fiat_p256_uint1 x187;
+  uint32_t x188;
+  fiat_p256_uint1 x189;
+  uint32_t x190;
+  fiat_p256_uint1 x191;
+  uint32_t x192;
+  fiat_p256_uint1 x193;
+  uint32_t x194;
+  fiat_p256_uint1 x195;
+  uint32_t x196;
+  fiat_p256_uint1 x197;
+  uint32_t x198;
+  fiat_p256_uint1 x199;
+  uint32_t x200;
+  fiat_p256_uint1 x201;
+  uint32_t x202;
+  fiat_p256_uint1 x203;
+  uint32_t x204;
+  fiat_p256_uint1 x205;
+  uint32_t x206;
+  uint32_t x207;
+  uint32_t x208;
+  uint32_t x209;
+  uint32_t x210;
+  uint32_t x211;
+  uint32_t x212;
+  uint32_t x213;
+  uint32_t x214;
+  uint32_t x215;
+  uint32_t x216;
+  uint32_t x217;
+  uint32_t x218;
+  uint32_t x219;
+  uint32_t x220;
+  uint32_t x221;
+  uint32_t x222;
+  uint32_t x223;
+  uint32_t x224;
+  uint32_t x225;
+  uint32_t x226;
+  uint32_t x227;
+  uint32_t x228;
+  uint32_t x229;
+  uint32_t x230;
+  fiat_p256_addcarryx_u32(&x1, &x2, 0x0, (~arg1), 0x1);
+  x3 = (fiat_p256_uint1)((fiat_p256_uint1)(x1 >> 31) & (fiat_p256_uint1)((arg3[0]) & 0x1));
+  fiat_p256_addcarryx_u32(&x4, &x5, 0x0, (~arg1), 0x1);
+  fiat_p256_cmovznz_u32(&x6, x3, arg1, x4);
+  fiat_p256_cmovznz_u32(&x7, x3, (arg2[0]), (arg3[0]));
+  fiat_p256_cmovznz_u32(&x8, x3, (arg2[1]), (arg3[1]));
+  fiat_p256_cmovznz_u32(&x9, x3, (arg2[2]), (arg3[2]));
+  fiat_p256_cmovznz_u32(&x10, x3, (arg2[3]), (arg3[3]));
+  fiat_p256_cmovznz_u32(&x11, x3, (arg2[4]), (arg3[4]));
+  fiat_p256_cmovznz_u32(&x12, x3, (arg2[5]), (arg3[5]));
+  fiat_p256_cmovznz_u32(&x13, x3, (arg2[6]), (arg3[6]));
+  fiat_p256_cmovznz_u32(&x14, x3, (arg2[7]), (arg3[7]));
+  fiat_p256_cmovznz_u32(&x15, x3, (arg2[8]), (arg3[8]));
+  fiat_p256_addcarryx_u32(&x16, &x17, 0x0, 0x1, (~(arg2[0])));
+  fiat_p256_addcarryx_u32(&x18, &x19, x17, 0x0, (~(arg2[1])));
+  fiat_p256_addcarryx_u32(&x20, &x21, x19, 0x0, (~(arg2[2])));
+  fiat_p256_addcarryx_u32(&x22, &x23, x21, 0x0, (~(arg2[3])));
+  fiat_p256_addcarryx_u32(&x24, &x25, x23, 0x0, (~(arg2[4])));
+  fiat_p256_addcarryx_u32(&x26, &x27, x25, 0x0, (~(arg2[5])));
+  fiat_p256_addcarryx_u32(&x28, &x29, x27, 0x0, (~(arg2[6])));
+  fiat_p256_addcarryx_u32(&x30, &x31, x29, 0x0, (~(arg2[7])));
+  fiat_p256_addcarryx_u32(&x32, &x33, x31, 0x0, (~(arg2[8])));
+  fiat_p256_cmovznz_u32(&x34, x3, (arg3[0]), x16);
+  fiat_p256_cmovznz_u32(&x35, x3, (arg3[1]), x18);
+  fiat_p256_cmovznz_u32(&x36, x3, (arg3[2]), x20);
+  fiat_p256_cmovznz_u32(&x37, x3, (arg3[3]), x22);
+  fiat_p256_cmovznz_u32(&x38, x3, (arg3[4]), x24);
+  fiat_p256_cmovznz_u32(&x39, x3, (arg3[5]), x26);
+  fiat_p256_cmovznz_u32(&x40, x3, (arg3[6]), x28);
+  fiat_p256_cmovznz_u32(&x41, x3, (arg3[7]), x30);
+  fiat_p256_cmovznz_u32(&x42, x3, (arg3[8]), x32);
+  fiat_p256_cmovznz_u32(&x43, x3, (arg4[0]), (arg5[0]));
+  fiat_p256_cmovznz_u32(&x44, x3, (arg4[1]), (arg5[1]));
+  fiat_p256_cmovznz_u32(&x45, x3, (arg4[2]), (arg5[2]));
+  fiat_p256_cmovznz_u32(&x46, x3, (arg4[3]), (arg5[3]));
+  fiat_p256_cmovznz_u32(&x47, x3, (arg4[4]), (arg5[4]));
+  fiat_p256_cmovznz_u32(&x48, x3, (arg4[5]), (arg5[5]));
+  fiat_p256_cmovznz_u32(&x49, x3, (arg4[6]), (arg5[6]));
+  fiat_p256_cmovznz_u32(&x50, x3, (arg4[7]), (arg5[7]));
+  fiat_p256_addcarryx_u32(&x51, &x52, 0x0, x43, x43);
+  fiat_p256_addcarryx_u32(&x53, &x54, x52, x44, x44);
+  fiat_p256_addcarryx_u32(&x55, &x56, x54, x45, x45);
+  fiat_p256_addcarryx_u32(&x57, &x58, x56, x46, x46);
+  fiat_p256_addcarryx_u32(&x59, &x60, x58, x47, x47);
+  fiat_p256_addcarryx_u32(&x61, &x62, x60, x48, x48);
+  fiat_p256_addcarryx_u32(&x63, &x64, x62, x49, x49);
+  fiat_p256_addcarryx_u32(&x65, &x66, x64, x50, x50);
+  fiat_p256_subborrowx_u32(&x67, &x68, 0x0, x51, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x69, &x70, x68, x53, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x71, &x72, x70, x55, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x73, &x74, x72, x57, 0x0);
+  fiat_p256_subborrowx_u32(&x75, &x76, x74, x59, 0x0);
+  fiat_p256_subborrowx_u32(&x77, &x78, x76, x61, 0x0);
+  fiat_p256_subborrowx_u32(&x79, &x80, x78, x63, 0x1);
+  fiat_p256_subborrowx_u32(&x81, &x82, x80, x65, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x83, &x84, x82, x66, 0x0);
+  x85 = (arg4[7]);
+  x86 = (arg4[6]);
+  x87 = (arg4[5]);
+  x88 = (arg4[4]);
+  x89 = (arg4[3]);
+  x90 = (arg4[2]);
+  x91 = (arg4[1]);
+  x92 = (arg4[0]);
+  fiat_p256_subborrowx_u32(&x93, &x94, 0x0, 0x0, x92);
+  fiat_p256_subborrowx_u32(&x95, &x96, x94, 0x0, x91);
+  fiat_p256_subborrowx_u32(&x97, &x98, x96, 0x0, x90);
+  fiat_p256_subborrowx_u32(&x99, &x100, x98, 0x0, x89);
+  fiat_p256_subborrowx_u32(&x101, &x102, x100, 0x0, x88);
+  fiat_p256_subborrowx_u32(&x103, &x104, x102, 0x0, x87);
+  fiat_p256_subborrowx_u32(&x105, &x106, x104, 0x0, x86);
+  fiat_p256_subborrowx_u32(&x107, &x108, x106, 0x0, x85);
+  fiat_p256_cmovznz_u32(&x109, x108, 0x0, UINT32_C(0xffffffff));
+  fiat_p256_addcarryx_u32(&x110, &x111, 0x0, x93, x109);
+  fiat_p256_addcarryx_u32(&x112, &x113, x111, x95, x109);
+  fiat_p256_addcarryx_u32(&x114, &x115, x113, x97, x109);
+  fiat_p256_addcarryx_u32(&x116, &x117, x115, x99, 0x0);
+  fiat_p256_addcarryx_u32(&x118, &x119, x117, x101, 0x0);
+  fiat_p256_addcarryx_u32(&x120, &x121, x119, x103, 0x0);
+  fiat_p256_addcarryx_u32(&x122, &x123, x121, x105, (fiat_p256_uint1)(x109 & 0x1));
+  fiat_p256_addcarryx_u32(&x124, &x125, x123, x107, x109);
+  fiat_p256_cmovznz_u32(&x126, x3, (arg5[0]), x110);
+  fiat_p256_cmovznz_u32(&x127, x3, (arg5[1]), x112);
+  fiat_p256_cmovznz_u32(&x128, x3, (arg5[2]), x114);
+  fiat_p256_cmovznz_u32(&x129, x3, (arg5[3]), x116);
+  fiat_p256_cmovznz_u32(&x130, x3, (arg5[4]), x118);
+  fiat_p256_cmovznz_u32(&x131, x3, (arg5[5]), x120);
+  fiat_p256_cmovznz_u32(&x132, x3, (arg5[6]), x122);
+  fiat_p256_cmovznz_u32(&x133, x3, (arg5[7]), x124);
+  x134 = (fiat_p256_uint1)(x34 & 0x1);
+  fiat_p256_cmovznz_u32(&x135, x134, 0x0, x7);
+  fiat_p256_cmovznz_u32(&x136, x134, 0x0, x8);
+  fiat_p256_cmovznz_u32(&x137, x134, 0x0, x9);
+  fiat_p256_cmovznz_u32(&x138, x134, 0x0, x10);
+  fiat_p256_cmovznz_u32(&x139, x134, 0x0, x11);
+  fiat_p256_cmovznz_u32(&x140, x134, 0x0, x12);
+  fiat_p256_cmovznz_u32(&x141, x134, 0x0, x13);
+  fiat_p256_cmovznz_u32(&x142, x134, 0x0, x14);
+  fiat_p256_cmovznz_u32(&x143, x134, 0x0, x15);
+  fiat_p256_addcarryx_u32(&x144, &x145, 0x0, x34, x135);
+  fiat_p256_addcarryx_u32(&x146, &x147, x145, x35, x136);
+  fiat_p256_addcarryx_u32(&x148, &x149, x147, x36, x137);
+  fiat_p256_addcarryx_u32(&x150, &x151, x149, x37, x138);
+  fiat_p256_addcarryx_u32(&x152, &x153, x151, x38, x139);
+  fiat_p256_addcarryx_u32(&x154, &x155, x153, x39, x140);
+  fiat_p256_addcarryx_u32(&x156, &x157, x155, x40, x141);
+  fiat_p256_addcarryx_u32(&x158, &x159, x157, x41, x142);
+  fiat_p256_addcarryx_u32(&x160, &x161, x159, x42, x143);
+  fiat_p256_cmovznz_u32(&x162, x134, 0x0, x43);
+  fiat_p256_cmovznz_u32(&x163, x134, 0x0, x44);
+  fiat_p256_cmovznz_u32(&x164, x134, 0x0, x45);
+  fiat_p256_cmovznz_u32(&x165, x134, 0x0, x46);
+  fiat_p256_cmovznz_u32(&x166, x134, 0x0, x47);
+  fiat_p256_cmovznz_u32(&x167, x134, 0x0, x48);
+  fiat_p256_cmovznz_u32(&x168, x134, 0x0, x49);
+  fiat_p256_cmovznz_u32(&x169, x134, 0x0, x50);
+  fiat_p256_addcarryx_u32(&x170, &x171, 0x0, x126, x162);
+  fiat_p256_addcarryx_u32(&x172, &x173, x171, x127, x163);
+  fiat_p256_addcarryx_u32(&x174, &x175, x173, x128, x164);
+  fiat_p256_addcarryx_u32(&x176, &x177, x175, x129, x165);
+  fiat_p256_addcarryx_u32(&x178, &x179, x177, x130, x166);
+  fiat_p256_addcarryx_u32(&x180, &x181, x179, x131, x167);
+  fiat_p256_addcarryx_u32(&x182, &x183, x181, x132, x168);
+  fiat_p256_addcarryx_u32(&x184, &x185, x183, x133, x169);
+  fiat_p256_subborrowx_u32(&x186, &x187, 0x0, x170, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x188, &x189, x187, x172, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x190, &x191, x189, x174, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x192, &x193, x191, x176, 0x0);
+  fiat_p256_subborrowx_u32(&x194, &x195, x193, x178, 0x0);
+  fiat_p256_subborrowx_u32(&x196, &x197, x195, x180, 0x0);
+  fiat_p256_subborrowx_u32(&x198, &x199, x197, x182, 0x1);
+  fiat_p256_subborrowx_u32(&x200, &x201, x199, x184, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u32(&x202, &x203, x201, x185, 0x0);
+  fiat_p256_addcarryx_u32(&x204, &x205, 0x0, x6, 0x1);
+  x206 = ((x144 >> 1) | ((x146 << 31) & UINT32_C(0xffffffff)));
+  x207 = ((x146 >> 1) | ((x148 << 31) & UINT32_C(0xffffffff)));
+  x208 = ((x148 >> 1) | ((x150 << 31) & UINT32_C(0xffffffff)));
+  x209 = ((x150 >> 1) | ((x152 << 31) & UINT32_C(0xffffffff)));
+  x210 = ((x152 >> 1) | ((x154 << 31) & UINT32_C(0xffffffff)));
+  x211 = ((x154 >> 1) | ((x156 << 31) & UINT32_C(0xffffffff)));
+  x212 = ((x156 >> 1) | ((x158 << 31) & UINT32_C(0xffffffff)));
+  x213 = ((x158 >> 1) | ((x160 << 31) & UINT32_C(0xffffffff)));
+  x214 = ((x160 & UINT32_C(0x80000000)) | (x160 >> 1));
+  fiat_p256_cmovznz_u32(&x215, x84, x67, x51);
+  fiat_p256_cmovznz_u32(&x216, x84, x69, x53);
+  fiat_p256_cmovznz_u32(&x217, x84, x71, x55);
+  fiat_p256_cmovznz_u32(&x218, x84, x73, x57);
+  fiat_p256_cmovznz_u32(&x219, x84, x75, x59);
+  fiat_p256_cmovznz_u32(&x220, x84, x77, x61);
+  fiat_p256_cmovznz_u32(&x221, x84, x79, x63);
+  fiat_p256_cmovznz_u32(&x222, x84, x81, x65);
+  fiat_p256_cmovznz_u32(&x223, x203, x186, x170);
+  fiat_p256_cmovznz_u32(&x224, x203, x188, x172);
+  fiat_p256_cmovznz_u32(&x225, x203, x190, x174);
+  fiat_p256_cmovznz_u32(&x226, x203, x192, x176);
+  fiat_p256_cmovznz_u32(&x227, x203, x194, x178);
+  fiat_p256_cmovznz_u32(&x228, x203, x196, x180);
+  fiat_p256_cmovznz_u32(&x229, x203, x198, x182);
+  fiat_p256_cmovznz_u32(&x230, x203, x200, x184);
+  *out1 = x204;
+  out2[0] = x7;
+  out2[1] = x8;
+  out2[2] = x9;
+  out2[3] = x10;
+  out2[4] = x11;
+  out2[5] = x12;
+  out2[6] = x13;
+  out2[7] = x14;
+  out2[8] = x15;
+  out3[0] = x206;
+  out3[1] = x207;
+  out3[2] = x208;
+  out3[3] = x209;
+  out3[4] = x210;
+  out3[5] = x211;
+  out3[6] = x212;
+  out3[7] = x213;
+  out3[8] = x214;
+  out4[0] = x215;
+  out4[1] = x216;
+  out4[2] = x217;
+  out4[3] = x218;
+  out4[4] = x219;
+  out4[5] = x220;
+  out4[6] = x221;
+  out4[7] = x222;
+  out5[0] = x223;
+  out5[1] = x224;
+  out5[2] = x225;
+  out5[3] = x226;
+  out5[4] = x227;
+  out5[5] = x228;
+  out5[6] = x229;
+  out5[7] = x230;
+}
+
+/*
+ * The function fiat_p256_divstep_precomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+ *
+ * Postconditions:
+ *   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋)
+ *   0 ≤ eval out1 < m
+ *
+ * Output Bounds:
+ *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_divstep_precomp(uint32_t out1[8]) {
+  out1[0] = UINT32_C(0xb8000000);
+  out1[1] = UINT32_C(0x67ffffff);
+  out1[2] = UINT32_C(0x38000000);
+  out1[3] = UINT32_C(0xc0000000);
+  out1[4] = UINT32_C(0x7fffffff);
+  out1[5] = UINT32_C(0xd8000000);
+  out1[6] = UINT32_C(0xffffffff);
+  out1[7] = UINT32_C(0x2fffffff);
+}
diff --git a/src/third_party/fiat/p256_64.h b/src/third_party/fiat/p256_64.h
index 773266a..c772638 100644
--- a/src/third_party/fiat/p256_64.h
+++ b/src/third_party/fiat/p256_64.h
@@ -1,8 +1,8 @@
-/* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 64 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
+/* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */
 /* curve description: p256 */
-/* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
-/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
 /* machine_wordsize = 64 (from "64") */
+/* requested operations: mul, square, add, sub, opp, from_montgomery, to_montgomery, nonzero, selectznz, to_bytes, from_bytes, one, msat, divstep, divstep_precomp */
+/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
 /*                                                                    */
 /* NOTE: In addition to the bounds specified above each function, all */
 /*   functions synthesized for this Montgomery arithmetic require the */
@@ -10,20 +10,52 @@
 /*   require the input to be in the unique saturated representation.  */
 /*   All functions also ensure that these two properties are true of  */
 /*   return values.                                                   */
+/*  */
+/* Computed values: */
+/*   eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */
+/*   bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */
+/*   twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in */
+/*                            if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */
 
 #include <stdint.h>
 typedef unsigned char fiat_p256_uint1;
 typedef signed char fiat_p256_int1;
-typedef signed __int128 fiat_p256_int128;
-typedef unsigned __int128 fiat_p256_uint128;
+#if defined(__GNUC__) || defined(__clang__)
+#  define FIAT_P256_FIAT_EXTENSION __extension__
+#  define FIAT_P256_FIAT_INLINE __inline__
+#else
+#  define FIAT_P256_FIAT_EXTENSION
+#  define FIAT_P256_FIAT_INLINE
+#endif
+
+FIAT_P256_FIAT_EXTENSION typedef signed __int128 fiat_p256_int128;
+FIAT_P256_FIAT_EXTENSION typedef unsigned __int128 fiat_p256_uint128;
+
+/* The type fiat_p256_montgomery_domain_field_element is a field element in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
+typedef uint64_t fiat_p256_montgomery_domain_field_element[4];
+
+/* The type fiat_p256_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
+typedef uint64_t fiat_p256_non_montgomery_domain_field_element[4];
 
 #if (-1 & 3) != 3
 #error "This code only works on a two's complement system"
 #endif
 
+#if !defined(FIAT_P256_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint64_t fiat_p256_value_barrier_u64(uint64_t a) {
+  __asm__("" : "+r"(a) : /* no inputs */);
+  return a;
+}
+#else
+#  define fiat_p256_value_barrier_u64(x) (x)
+#endif
+
 
 /*
  * The function fiat_p256_addcarryx_u64 is an addition with carry.
+ *
  * Postconditions:
  *   out1 = (arg1 + arg2 + arg3) mod 2^64
  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
@@ -36,16 +68,20 @@
  *   out1: [0x0 ~> 0xffffffffffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  fiat_p256_uint128 x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
-  uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
-  fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 64);
+static FIAT_P256_FIAT_INLINE void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  fiat_p256_uint128 x1;
+  uint64_t x2;
+  fiat_p256_uint1 x3;
+  x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
+  x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+  x3 = (fiat_p256_uint1)(x1 >> 64);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
+ *
  * Postconditions:
  *   out1 = (-arg1 + arg2 + -arg3) mod 2^64
  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
@@ -58,16 +94,20 @@
  *   out1: [0x0 ~> 0xffffffffffffffff]
  *   out2: [0x0 ~> 0x1]
  */
-static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  fiat_p256_int128 x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
-  fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 64);
-  uint64_t x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+static FIAT_P256_FIAT_INLINE void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  fiat_p256_int128 x1;
+  fiat_p256_int1 x2;
+  uint64_t x3;
+  x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
+  x2 = (fiat_p256_int1)(x1 >> 64);
+  x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
   *out1 = x3;
   *out2 = (fiat_p256_uint1)(0x0 - x2);
 }
 
 /*
  * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
+ *
  * Postconditions:
  *   out1 = (arg1 * arg2) mod 2^64
  *   out2 = ⌊arg1 * arg2 / 2^64⌋
@@ -79,16 +119,20 @@
  *   out1: [0x0 ~> 0xffffffffffffffff]
  *   out2: [0x0 ~> 0xffffffffffffffff]
  */
-static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
-  fiat_p256_uint128 x1 = ((fiat_p256_uint128)arg1 * arg2);
-  uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
-  uint64_t x3 = (uint64_t)(x1 >> 64);
+static FIAT_P256_FIAT_INLINE void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
+  fiat_p256_uint128 x1;
+  uint64_t x2;
+  uint64_t x3;
+  x1 = ((fiat_p256_uint128)arg1 * arg2);
+  x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+  x3 = (uint64_t)(x1 >> 64);
   *out1 = x2;
   *out2 = x3;
 }
 
 /*
  * The function fiat_p256_cmovznz_u64 is a single-word conditional move.
+ *
  * Postconditions:
  *   out1 = (if arg1 = 0 then arg2 else arg3)
  *
@@ -99,21 +143,19 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffffffffffff]
  */
-static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
-  fiat_p256_uint1 x1 = (!(!arg1));
-  uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
-  // Note this line has been patched from the synthesized code to add value
-  // barriers.
-  //
-  // Clang recognizes this pattern as a select. While it usually transforms it
-  // to a cmov, it sometimes further transforms it into a branch, which we do
-  // not want.
-  uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
+static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
+  fiat_p256_uint1 x1;
+  uint64_t x2;
+  uint64_t x3;
+  x1 = (!(!arg1));
+  x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
+  x3 = ((fiat_p256_value_barrier_u64(x2) & arg3) | (fiat_p256_value_barrier_u64((~x2)) & arg2));
   *out1 = x3;
 }
 
 /*
  * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -121,287 +163,297 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
-  uint64_t x1 = (arg1[1]);
-  uint64_t x2 = (arg1[2]);
-  uint64_t x3 = (arg1[3]);
-  uint64_t x4 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
   uint64_t x5;
   uint64_t x6;
-  fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
   uint64_t x7;
   uint64_t x8;
-  fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
   uint64_t x9;
   uint64_t x10;
-  fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
   uint64_t x11;
   uint64_t x12;
-  fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
   uint64_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
   uint64_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
   uint64_t x17;
   fiat_p256_uint1 x18;
-  fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
-  uint64_t x19 = (x18 + x6);
+  uint64_t x19;
   uint64_t x20;
   uint64_t x21;
-  fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
   uint64_t x22;
   uint64_t x23;
-  fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
   uint64_t x24;
   uint64_t x25;
-  fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
   uint64_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
-  uint64_t x28 = (x27 + x23);
+  uint64_t x28;
   uint64_t x29;
   fiat_p256_uint1 x30;
-  fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
   uint64_t x31;
   fiat_p256_uint1 x32;
-  fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
   uint64_t x33;
   fiat_p256_uint1 x34;
-  fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
   uint64_t x35;
   fiat_p256_uint1 x36;
-  fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
   uint64_t x37;
   fiat_p256_uint1 x38;
-  fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
   uint64_t x39;
   uint64_t x40;
-  fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
   uint64_t x41;
   uint64_t x42;
-  fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
   uint64_t x43;
   uint64_t x44;
-  fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
   uint64_t x45;
   uint64_t x46;
-  fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
   uint64_t x47;
   fiat_p256_uint1 x48;
-  fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
   uint64_t x49;
   fiat_p256_uint1 x50;
-  fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
   uint64_t x51;
   fiat_p256_uint1 x52;
-  fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
-  uint64_t x53 = (x52 + x40);
+  uint64_t x53;
   uint64_t x54;
   fiat_p256_uint1 x55;
-  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
   uint64_t x56;
   fiat_p256_uint1 x57;
-  fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
   uint64_t x58;
   fiat_p256_uint1 x59;
-  fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
   uint64_t x60;
   fiat_p256_uint1 x61;
-  fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
   uint64_t x62;
   fiat_p256_uint1 x63;
-  fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
   uint64_t x64;
   uint64_t x65;
-  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
   uint64_t x66;
   uint64_t x67;
-  fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
   uint64_t x68;
   uint64_t x69;
-  fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
   uint64_t x70;
   fiat_p256_uint1 x71;
-  fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
-  uint64_t x72 = (x71 + x67);
+  uint64_t x72;
   uint64_t x73;
   fiat_p256_uint1 x74;
-  fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
   uint64_t x75;
   fiat_p256_uint1 x76;
-  fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
   uint64_t x77;
   fiat_p256_uint1 x78;
-  fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
   uint64_t x79;
   fiat_p256_uint1 x80;
-  fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
   uint64_t x81;
   fiat_p256_uint1 x82;
-  fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
-  uint64_t x83 = ((uint64_t)x82 + x63);
+  uint64_t x83;
   uint64_t x84;
   uint64_t x85;
-  fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
   uint64_t x86;
   uint64_t x87;
-  fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
   uint64_t x88;
   uint64_t x89;
-  fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
   uint64_t x90;
   uint64_t x91;
-  fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
   uint64_t x92;
   fiat_p256_uint1 x93;
-  fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
   uint64_t x94;
   fiat_p256_uint1 x95;
-  fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
   uint64_t x96;
   fiat_p256_uint1 x97;
-  fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
-  uint64_t x98 = (x97 + x85);
+  uint64_t x98;
   uint64_t x99;
   fiat_p256_uint1 x100;
-  fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
   uint64_t x101;
   fiat_p256_uint1 x102;
-  fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
   uint64_t x103;
   fiat_p256_uint1 x104;
-  fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
   uint64_t x105;
   fiat_p256_uint1 x106;
-  fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
   uint64_t x107;
   fiat_p256_uint1 x108;
-  fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
   uint64_t x109;
   uint64_t x110;
-  fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
   uint64_t x111;
   uint64_t x112;
-  fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
   uint64_t x113;
   uint64_t x114;
-  fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
   uint64_t x115;
   fiat_p256_uint1 x116;
-  fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
-  uint64_t x117 = (x116 + x112);
+  uint64_t x117;
   uint64_t x118;
   fiat_p256_uint1 x119;
-  fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
   uint64_t x120;
   fiat_p256_uint1 x121;
-  fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
   uint64_t x122;
   fiat_p256_uint1 x123;
-  fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
   uint64_t x124;
   fiat_p256_uint1 x125;
-  fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
   uint64_t x126;
   fiat_p256_uint1 x127;
-  fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
-  uint64_t x128 = ((uint64_t)x127 + x108);
+  uint64_t x128;
   uint64_t x129;
   uint64_t x130;
-  fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
   uint64_t x131;
   uint64_t x132;
-  fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
   uint64_t x133;
   uint64_t x134;
-  fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
   uint64_t x135;
   uint64_t x136;
-  fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
   uint64_t x137;
   fiat_p256_uint1 x138;
-  fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
   uint64_t x139;
   fiat_p256_uint1 x140;
-  fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
   uint64_t x141;
   fiat_p256_uint1 x142;
-  fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
-  uint64_t x143 = (x142 + x130);
+  uint64_t x143;
   uint64_t x144;
   fiat_p256_uint1 x145;
-  fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
   uint64_t x146;
   fiat_p256_uint1 x147;
-  fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
   uint64_t x148;
   fiat_p256_uint1 x149;
-  fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
   uint64_t x150;
   fiat_p256_uint1 x151;
-  fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
   uint64_t x152;
   fiat_p256_uint1 x153;
-  fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
   uint64_t x154;
   uint64_t x155;
-  fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
   uint64_t x156;
   uint64_t x157;
-  fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
   uint64_t x158;
   uint64_t x159;
-  fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
   uint64_t x160;
   fiat_p256_uint1 x161;
-  fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
-  uint64_t x162 = (x161 + x157);
+  uint64_t x162;
   uint64_t x163;
   fiat_p256_uint1 x164;
-  fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
   uint64_t x165;
   fiat_p256_uint1 x166;
-  fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
   uint64_t x167;
   fiat_p256_uint1 x168;
-  fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
   uint64_t x169;
   fiat_p256_uint1 x170;
-  fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
   uint64_t x171;
   fiat_p256_uint1 x172;
-  fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
-  uint64_t x173 = ((uint64_t)x172 + x153);
+  uint64_t x173;
   uint64_t x174;
   fiat_p256_uint1 x175;
-  fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
   uint64_t x176;
   fiat_p256_uint1 x177;
-  fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
   uint64_t x178;
   fiat_p256_uint1 x179;
-  fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
   uint64_t x180;
   fiat_p256_uint1 x181;
-  fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
   uint64_t x182;
   fiat_p256_uint1 x183;
-  fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
   uint64_t x184;
-  fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
   uint64_t x185;
-  fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
   uint64_t x186;
-  fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
   uint64_t x187;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[0]);
+  fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
+  fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
+  fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
+  fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
+  fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
+  fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
+  fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
+  x19 = (x18 + x6);
+  fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
+  x28 = (x27 + x23);
+  fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
+  fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
+  fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
+  fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
+  fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
+  fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
+  fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
+  fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
+  fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
+  fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
+  fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
+  fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
+  x53 = (x52 + x40);
+  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
+  fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
+  fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
+  fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
+  fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
+  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
+  x72 = (x71 + x67);
+  fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
+  fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
+  fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
+  fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
+  fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
+  x83 = ((uint64_t)x82 + x63);
+  fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
+  fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
+  fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
+  fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
+  fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
+  fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
+  fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
+  x98 = (x97 + x85);
+  fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
+  fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
+  fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
+  fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
+  fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
+  fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
+  x117 = (x116 + x112);
+  fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
+  fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
+  fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
+  fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
+  fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
+  x128 = ((uint64_t)x127 + x108);
+  fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
+  fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
+  fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
+  fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
+  fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
+  fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
+  fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
+  x143 = (x142 + x130);
+  fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
+  fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
+  fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
+  fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
+  fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
+  fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
+  x162 = (x161 + x157);
+  fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
+  fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
+  fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
+  fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
+  fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
+  x173 = ((uint64_t)x172 + x153);
+  fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
+  fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
+  fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
+  fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
+  fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
   out1[0] = x184;
   out1[1] = x185;
@@ -411,292 +463,304 @@
 
 /*
  * The function fiat_p256_square squares a field element in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) {
-  uint64_t x1 = (arg1[1]);
-  uint64_t x2 = (arg1[2]);
-  uint64_t x3 = (arg1[3]);
-  uint64_t x4 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
   uint64_t x5;
   uint64_t x6;
-  fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
   uint64_t x7;
   uint64_t x8;
-  fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
   uint64_t x9;
   uint64_t x10;
-  fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
   uint64_t x11;
   uint64_t x12;
-  fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
   uint64_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
   uint64_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
   uint64_t x17;
   fiat_p256_uint1 x18;
-  fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
-  uint64_t x19 = (x18 + x6);
+  uint64_t x19;
   uint64_t x20;
   uint64_t x21;
-  fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
   uint64_t x22;
   uint64_t x23;
-  fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
   uint64_t x24;
   uint64_t x25;
-  fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
   uint64_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
-  uint64_t x28 = (x27 + x23);
+  uint64_t x28;
   uint64_t x29;
   fiat_p256_uint1 x30;
-  fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
   uint64_t x31;
   fiat_p256_uint1 x32;
-  fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
   uint64_t x33;
   fiat_p256_uint1 x34;
-  fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
   uint64_t x35;
   fiat_p256_uint1 x36;
-  fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
   uint64_t x37;
   fiat_p256_uint1 x38;
-  fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
   uint64_t x39;
   uint64_t x40;
-  fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
   uint64_t x41;
   uint64_t x42;
-  fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
   uint64_t x43;
   uint64_t x44;
-  fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
   uint64_t x45;
   uint64_t x46;
-  fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
   uint64_t x47;
   fiat_p256_uint1 x48;
-  fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
   uint64_t x49;
   fiat_p256_uint1 x50;
-  fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
   uint64_t x51;
   fiat_p256_uint1 x52;
-  fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
-  uint64_t x53 = (x52 + x40);
+  uint64_t x53;
   uint64_t x54;
   fiat_p256_uint1 x55;
-  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
   uint64_t x56;
   fiat_p256_uint1 x57;
-  fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
   uint64_t x58;
   fiat_p256_uint1 x59;
-  fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
   uint64_t x60;
   fiat_p256_uint1 x61;
-  fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
   uint64_t x62;
   fiat_p256_uint1 x63;
-  fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
   uint64_t x64;
   uint64_t x65;
-  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
   uint64_t x66;
   uint64_t x67;
-  fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
   uint64_t x68;
   uint64_t x69;
-  fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
   uint64_t x70;
   fiat_p256_uint1 x71;
-  fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
-  uint64_t x72 = (x71 + x67);
+  uint64_t x72;
   uint64_t x73;
   fiat_p256_uint1 x74;
-  fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
   uint64_t x75;
   fiat_p256_uint1 x76;
-  fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
   uint64_t x77;
   fiat_p256_uint1 x78;
-  fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
   uint64_t x79;
   fiat_p256_uint1 x80;
-  fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
   uint64_t x81;
   fiat_p256_uint1 x82;
-  fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
-  uint64_t x83 = ((uint64_t)x82 + x63);
+  uint64_t x83;
   uint64_t x84;
   uint64_t x85;
-  fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
   uint64_t x86;
   uint64_t x87;
-  fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
   uint64_t x88;
   uint64_t x89;
-  fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
   uint64_t x90;
   uint64_t x91;
-  fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
   uint64_t x92;
   fiat_p256_uint1 x93;
-  fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
   uint64_t x94;
   fiat_p256_uint1 x95;
-  fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
   uint64_t x96;
   fiat_p256_uint1 x97;
-  fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
-  uint64_t x98 = (x97 + x85);
+  uint64_t x98;
   uint64_t x99;
   fiat_p256_uint1 x100;
-  fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
   uint64_t x101;
   fiat_p256_uint1 x102;
-  fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
   uint64_t x103;
   fiat_p256_uint1 x104;
-  fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
   uint64_t x105;
   fiat_p256_uint1 x106;
-  fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
   uint64_t x107;
   fiat_p256_uint1 x108;
-  fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
   uint64_t x109;
   uint64_t x110;
-  fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
   uint64_t x111;
   uint64_t x112;
-  fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
   uint64_t x113;
   uint64_t x114;
-  fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
   uint64_t x115;
   fiat_p256_uint1 x116;
-  fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
-  uint64_t x117 = (x116 + x112);
+  uint64_t x117;
   uint64_t x118;
   fiat_p256_uint1 x119;
-  fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
   uint64_t x120;
   fiat_p256_uint1 x121;
-  fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
   uint64_t x122;
   fiat_p256_uint1 x123;
-  fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
   uint64_t x124;
   fiat_p256_uint1 x125;
-  fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
   uint64_t x126;
   fiat_p256_uint1 x127;
-  fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
-  uint64_t x128 = ((uint64_t)x127 + x108);
+  uint64_t x128;
   uint64_t x129;
   uint64_t x130;
-  fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
   uint64_t x131;
   uint64_t x132;
-  fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
   uint64_t x133;
   uint64_t x134;
-  fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
   uint64_t x135;
   uint64_t x136;
-  fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
   uint64_t x137;
   fiat_p256_uint1 x138;
-  fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
   uint64_t x139;
   fiat_p256_uint1 x140;
-  fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
   uint64_t x141;
   fiat_p256_uint1 x142;
-  fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
-  uint64_t x143 = (x142 + x130);
+  uint64_t x143;
   uint64_t x144;
   fiat_p256_uint1 x145;
-  fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
   uint64_t x146;
   fiat_p256_uint1 x147;
-  fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
   uint64_t x148;
   fiat_p256_uint1 x149;
-  fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
   uint64_t x150;
   fiat_p256_uint1 x151;
-  fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
   uint64_t x152;
   fiat_p256_uint1 x153;
-  fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
   uint64_t x154;
   uint64_t x155;
-  fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
   uint64_t x156;
   uint64_t x157;
-  fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
   uint64_t x158;
   uint64_t x159;
-  fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
   uint64_t x160;
   fiat_p256_uint1 x161;
-  fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
-  uint64_t x162 = (x161 + x157);
+  uint64_t x162;
   uint64_t x163;
   fiat_p256_uint1 x164;
-  fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
   uint64_t x165;
   fiat_p256_uint1 x166;
-  fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
   uint64_t x167;
   fiat_p256_uint1 x168;
-  fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
   uint64_t x169;
   fiat_p256_uint1 x170;
-  fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
   uint64_t x171;
   fiat_p256_uint1 x172;
-  fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
-  uint64_t x173 = ((uint64_t)x172 + x153);
+  uint64_t x173;
   uint64_t x174;
   fiat_p256_uint1 x175;
-  fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
   uint64_t x176;
   fiat_p256_uint1 x177;
-  fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
   uint64_t x178;
   fiat_p256_uint1 x179;
-  fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
   uint64_t x180;
   fiat_p256_uint1 x181;
-  fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
   uint64_t x182;
   fiat_p256_uint1 x183;
-  fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
   uint64_t x184;
-  fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
   uint64_t x185;
-  fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
   uint64_t x186;
-  fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
   uint64_t x187;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[0]);
+  fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
+  fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
+  fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
+  fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
+  fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
+  fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
+  fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
+  x19 = (x18 + x6);
+  fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
+  x28 = (x27 + x23);
+  fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
+  fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
+  fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
+  fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
+  fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
+  fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
+  fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
+  fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
+  fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
+  fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
+  fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
+  fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
+  x53 = (x52 + x40);
+  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
+  fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
+  fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
+  fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
+  fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
+  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
+  x72 = (x71 + x67);
+  fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
+  fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
+  fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
+  fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
+  fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
+  x83 = ((uint64_t)x82 + x63);
+  fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
+  fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
+  fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
+  fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
+  fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
+  fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
+  fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
+  x98 = (x97 + x85);
+  fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
+  fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
+  fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
+  fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
+  fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
+  fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
+  x117 = (x116 + x112);
+  fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
+  fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
+  fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
+  fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
+  fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
+  x128 = ((uint64_t)x127 + x108);
+  fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
+  fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
+  fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
+  fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
+  fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
+  fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
+  fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
+  x143 = (x142 + x130);
+  fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
+  fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
+  fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
+  fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
+  fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
+  fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
+  x162 = (x161 + x157);
+  fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
+  fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
+  fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
+  fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
+  fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
+  x173 = ((uint64_t)x172 + x153);
+  fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
+  fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
+  fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
+  fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
+  fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
   out1[0] = x184;
   out1[1] = x185;
@@ -706,6 +770,7 @@
 
 /*
  * The function fiat_p256_add adds two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -713,47 +778,42 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_add(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
   uint64_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
   uint64_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
   uint64_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
   uint64_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
   uint64_t x9;
   fiat_p256_uint1 x10;
-  fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
   uint64_t x11;
   fiat_p256_uint1 x12;
-  fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
   uint64_t x13;
   fiat_p256_uint1 x14;
-  fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
   uint64_t x15;
   fiat_p256_uint1 x16;
-  fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
   uint64_t x17;
   fiat_p256_uint1 x18;
-  fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
   uint64_t x19;
-  fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
   uint64_t x20;
-  fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
   uint64_t x21;
-  fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
   uint64_t x22;
+  fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+  fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+  fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+  fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+  fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
+  fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
+  fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
+  fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
+  fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
   fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
   out1[0] = x19;
   out1[1] = x20;
@@ -763,6 +823,7 @@
 
 /*
  * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  *   0 ≤ eval arg2 < m
@@ -770,38 +831,33 @@
  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_sub(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) {
   uint64_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
   uint64_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
   uint64_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
   uint64_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
   uint64_t x9;
-  fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
   uint64_t x10;
   fiat_p256_uint1 x11;
-  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
   uint64_t x12;
   fiat_p256_uint1 x13;
-  fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
   uint64_t x14;
   fiat_p256_uint1 x15;
-  fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
   uint64_t x16;
   fiat_p256_uint1 x17;
+  fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+  fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+  fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+  fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+  fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
+  fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
+  fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
   out1[0] = x10;
   out1[1] = x12;
@@ -811,43 +867,40 @@
 
 /*
  * The function fiat_p256_opp negates a field element in the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_opp(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
   uint64_t x1;
   fiat_p256_uint1 x2;
-  fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
   uint64_t x3;
   fiat_p256_uint1 x4;
-  fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
   uint64_t x5;
   fiat_p256_uint1 x6;
-  fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
   uint64_t x7;
   fiat_p256_uint1 x8;
-  fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
   uint64_t x9;
-  fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
   uint64_t x10;
   fiat_p256_uint1 x11;
-  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
   uint64_t x12;
   fiat_p256_uint1 x13;
-  fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
   uint64_t x14;
   fiat_p256_uint1 x15;
-  fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
   uint64_t x16;
   fiat_p256_uint1 x17;
+  fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
+  fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
+  fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
+  fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
+  fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x9);
+  fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
+  fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
   out1[0] = x10;
   out1[1] = x12;
@@ -857,153 +910,152 @@
 
 /*
  * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
  *   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
  *   0 ≤ eval out1 < m
  *
- * Input Bounds:
- *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
- * Output Bounds:
- *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) {
-  uint64_t x1 = (arg1[0]);
+static FIAT_P256_FIAT_INLINE void fiat_p256_from_montgomery(fiat_p256_non_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) {
+  uint64_t x1;
   uint64_t x2;
   uint64_t x3;
-  fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001));
   uint64_t x4;
   uint64_t x5;
-  fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff));
   uint64_t x6;
   uint64_t x7;
-  fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff));
   uint64_t x8;
   fiat_p256_uint1 x9;
-  fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4);
   uint64_t x10;
   fiat_p256_uint1 x11;
-  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6);
   uint64_t x12;
   fiat_p256_uint1 x13;
-  fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8);
   uint64_t x14;
   fiat_p256_uint1 x15;
-  fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1]));
   uint64_t x16;
   uint64_t x17;
-  fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001));
   uint64_t x18;
   uint64_t x19;
-  fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff));
   uint64_t x20;
   uint64_t x21;
-  fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff));
   uint64_t x22;
   fiat_p256_uint1 x23;
-  fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18);
   uint64_t x24;
   fiat_p256_uint1 x25;
-  fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20);
   uint64_t x26;
   fiat_p256_uint1 x27;
-  fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22);
   uint64_t x28;
   fiat_p256_uint1 x29;
-  fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19));
   uint64_t x30;
   fiat_p256_uint1 x31;
-  fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16);
   uint64_t x32;
   fiat_p256_uint1 x33;
-  fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2]));
   uint64_t x34;
   fiat_p256_uint1 x35;
-  fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0);
   uint64_t x36;
   fiat_p256_uint1 x37;
-  fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0);
   uint64_t x38;
   uint64_t x39;
-  fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001));
   uint64_t x40;
   uint64_t x41;
-  fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff));
   uint64_t x42;
   uint64_t x43;
-  fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff));
   uint64_t x44;
   fiat_p256_uint1 x45;
-  fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
   uint64_t x46;
   fiat_p256_uint1 x47;
-  fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42);
   uint64_t x48;
   fiat_p256_uint1 x49;
-  fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44);
   uint64_t x50;
   fiat_p256_uint1 x51;
-  fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41));
   uint64_t x52;
   fiat_p256_uint1 x53;
-  fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38);
   uint64_t x54;
   fiat_p256_uint1 x55;
-  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3]));
   uint64_t x56;
   fiat_p256_uint1 x57;
-  fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0);
   uint64_t x58;
   fiat_p256_uint1 x59;
-  fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0);
   uint64_t x60;
   uint64_t x61;
-  fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001));
   uint64_t x62;
   uint64_t x63;
-  fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff));
   uint64_t x64;
   uint64_t x65;
-  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff));
   uint64_t x66;
   fiat_p256_uint1 x67;
-  fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
   uint64_t x68;
   fiat_p256_uint1 x69;
-  fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64);
   uint64_t x70;
   fiat_p256_uint1 x71;
-  fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66);
   uint64_t x72;
   fiat_p256_uint1 x73;
-  fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63));
   uint64_t x74;
   fiat_p256_uint1 x75;
-  fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60);
-  uint64_t x76 = (x75 + x61);
+  uint64_t x76;
   uint64_t x77;
   fiat_p256_uint1 x78;
-  fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff));
   uint64_t x79;
   fiat_p256_uint1 x80;
-  fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff));
   uint64_t x81;
   fiat_p256_uint1 x82;
-  fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0);
   uint64_t x83;
   fiat_p256_uint1 x84;
-  fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001));
   uint64_t x85;
   fiat_p256_uint1 x86;
-  fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0);
   uint64_t x87;
-  fiat_p256_cmovznz_u64(&x87, x86, x77, x70);
   uint64_t x88;
-  fiat_p256_cmovznz_u64(&x88, x86, x79, x72);
   uint64_t x89;
-  fiat_p256_cmovznz_u64(&x89, x86, x81, x74);
   uint64_t x90;
+  x1 = (arg1[0]);
+  fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4);
+  fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6);
+  fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8);
+  fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1]));
+  fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18);
+  fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20);
+  fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22);
+  fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19));
+  fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16);
+  fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2]));
+  fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0);
+  fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0);
+  fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
+  fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42);
+  fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44);
+  fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41));
+  fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38);
+  fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3]));
+  fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0);
+  fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0);
+  fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
+  fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64);
+  fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66);
+  fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63));
+  fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60);
+  x76 = (x75 + x61);
+  fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0);
+  fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0);
+  fiat_p256_cmovznz_u64(&x87, x86, x77, x70);
+  fiat_p256_cmovznz_u64(&x88, x86, x79, x72);
+  fiat_p256_cmovznz_u64(&x89, x86, x81, x74);
   fiat_p256_cmovznz_u64(&x90, x86, x83, x76);
   out1[0] = x87;
   out1[1] = x88;
@@ -1012,7 +1064,284 @@
 }
 
 /*
+ * The function fiat_p256_to_montgomery translates a field element into the Montgomery domain.
+ *
+ * Preconditions:
+ *   0 ≤ eval arg1 < m
+ * Postconditions:
+ *   eval (from_montgomery out1) mod m = eval arg1 mod m
+ *   0 ≤ eval out1 < m
+ *
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_to_montgomery(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_non_montgomery_domain_field_element arg1) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  uint64_t x13;
+  fiat_p256_uint1 x14;
+  uint64_t x15;
+  fiat_p256_uint1 x16;
+  uint64_t x17;
+  fiat_p256_uint1 x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  fiat_p256_uint1 x26;
+  uint64_t x27;
+  fiat_p256_uint1 x28;
+  uint64_t x29;
+  fiat_p256_uint1 x30;
+  uint64_t x31;
+  fiat_p256_uint1 x32;
+  uint64_t x33;
+  fiat_p256_uint1 x34;
+  uint64_t x35;
+  fiat_p256_uint1 x36;
+  uint64_t x37;
+  uint64_t x38;
+  uint64_t x39;
+  uint64_t x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  fiat_p256_uint1 x46;
+  uint64_t x47;
+  fiat_p256_uint1 x48;
+  uint64_t x49;
+  fiat_p256_uint1 x50;
+  uint64_t x51;
+  fiat_p256_uint1 x52;
+  uint64_t x53;
+  fiat_p256_uint1 x54;
+  uint64_t x55;
+  fiat_p256_uint1 x56;
+  uint64_t x57;
+  fiat_p256_uint1 x58;
+  uint64_t x59;
+  uint64_t x60;
+  uint64_t x61;
+  uint64_t x62;
+  uint64_t x63;
+  uint64_t x64;
+  uint64_t x65;
+  fiat_p256_uint1 x66;
+  uint64_t x67;
+  fiat_p256_uint1 x68;
+  uint64_t x69;
+  fiat_p256_uint1 x70;
+  uint64_t x71;
+  fiat_p256_uint1 x72;
+  uint64_t x73;
+  fiat_p256_uint1 x74;
+  uint64_t x75;
+  fiat_p256_uint1 x76;
+  uint64_t x77;
+  uint64_t x78;
+  uint64_t x79;
+  uint64_t x80;
+  uint64_t x81;
+  uint64_t x82;
+  uint64_t x83;
+  uint64_t x84;
+  uint64_t x85;
+  fiat_p256_uint1 x86;
+  uint64_t x87;
+  fiat_p256_uint1 x88;
+  uint64_t x89;
+  fiat_p256_uint1 x90;
+  uint64_t x91;
+  fiat_p256_uint1 x92;
+  uint64_t x93;
+  fiat_p256_uint1 x94;
+  uint64_t x95;
+  fiat_p256_uint1 x96;
+  uint64_t x97;
+  fiat_p256_uint1 x98;
+  uint64_t x99;
+  uint64_t x100;
+  uint64_t x101;
+  uint64_t x102;
+  uint64_t x103;
+  uint64_t x104;
+  uint64_t x105;
+  fiat_p256_uint1 x106;
+  uint64_t x107;
+  fiat_p256_uint1 x108;
+  uint64_t x109;
+  fiat_p256_uint1 x110;
+  uint64_t x111;
+  fiat_p256_uint1 x112;
+  uint64_t x113;
+  fiat_p256_uint1 x114;
+  uint64_t x115;
+  fiat_p256_uint1 x116;
+  uint64_t x117;
+  uint64_t x118;
+  uint64_t x119;
+  uint64_t x120;
+  uint64_t x121;
+  uint64_t x122;
+  uint64_t x123;
+  uint64_t x124;
+  uint64_t x125;
+  fiat_p256_uint1 x126;
+  uint64_t x127;
+  fiat_p256_uint1 x128;
+  uint64_t x129;
+  fiat_p256_uint1 x130;
+  uint64_t x131;
+  fiat_p256_uint1 x132;
+  uint64_t x133;
+  fiat_p256_uint1 x134;
+  uint64_t x135;
+  fiat_p256_uint1 x136;
+  uint64_t x137;
+  fiat_p256_uint1 x138;
+  uint64_t x139;
+  uint64_t x140;
+  uint64_t x141;
+  uint64_t x142;
+  uint64_t x143;
+  uint64_t x144;
+  uint64_t x145;
+  fiat_p256_uint1 x146;
+  uint64_t x147;
+  fiat_p256_uint1 x148;
+  uint64_t x149;
+  fiat_p256_uint1 x150;
+  uint64_t x151;
+  fiat_p256_uint1 x152;
+  uint64_t x153;
+  fiat_p256_uint1 x154;
+  uint64_t x155;
+  fiat_p256_uint1 x156;
+  uint64_t x157;
+  fiat_p256_uint1 x158;
+  uint64_t x159;
+  fiat_p256_uint1 x160;
+  uint64_t x161;
+  fiat_p256_uint1 x162;
+  uint64_t x163;
+  fiat_p256_uint1 x164;
+  uint64_t x165;
+  fiat_p256_uint1 x166;
+  uint64_t x167;
+  uint64_t x168;
+  uint64_t x169;
+  uint64_t x170;
+  x1 = (arg1[1]);
+  x2 = (arg1[2]);
+  x3 = (arg1[3]);
+  x4 = (arg1[0]);
+  fiat_p256_mulx_u64(&x5, &x6, x4, UINT64_C(0x4fffffffd));
+  fiat_p256_mulx_u64(&x7, &x8, x4, UINT64_C(0xfffffffffffffffe));
+  fiat_p256_mulx_u64(&x9, &x10, x4, UINT64_C(0xfffffffbffffffff));
+  fiat_p256_mulx_u64(&x11, &x12, x4, 0x3);
+  fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
+  fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
+  fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
+  fiat_p256_mulx_u64(&x19, &x20, x11, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x21, &x22, x11, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x23, &x24, x11, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x25, &x26, 0x0, x24, x21);
+  fiat_p256_addcarryx_u64(&x27, &x28, 0x0, x11, x23);
+  fiat_p256_addcarryx_u64(&x29, &x30, x28, x13, x25);
+  fiat_p256_addcarryx_u64(&x31, &x32, x30, x15, (x26 + x22));
+  fiat_p256_addcarryx_u64(&x33, &x34, x32, x17, x19);
+  fiat_p256_addcarryx_u64(&x35, &x36, x34, (x18 + x6), x20);
+  fiat_p256_mulx_u64(&x37, &x38, x1, UINT64_C(0x4fffffffd));
+  fiat_p256_mulx_u64(&x39, &x40, x1, UINT64_C(0xfffffffffffffffe));
+  fiat_p256_mulx_u64(&x41, &x42, x1, UINT64_C(0xfffffffbffffffff));
+  fiat_p256_mulx_u64(&x43, &x44, x1, 0x3);
+  fiat_p256_addcarryx_u64(&x45, &x46, 0x0, x44, x41);
+  fiat_p256_addcarryx_u64(&x47, &x48, x46, x42, x39);
+  fiat_p256_addcarryx_u64(&x49, &x50, x48, x40, x37);
+  fiat_p256_addcarryx_u64(&x51, &x52, 0x0, x29, x43);
+  fiat_p256_addcarryx_u64(&x53, &x54, x52, x31, x45);
+  fiat_p256_addcarryx_u64(&x55, &x56, x54, x33, x47);
+  fiat_p256_addcarryx_u64(&x57, &x58, x56, x35, x49);
+  fiat_p256_mulx_u64(&x59, &x60, x51, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x61, &x62, x51, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x63, &x64, x51, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x65, &x66, 0x0, x64, x61);
+  fiat_p256_addcarryx_u64(&x67, &x68, 0x0, x51, x63);
+  fiat_p256_addcarryx_u64(&x69, &x70, x68, x53, x65);
+  fiat_p256_addcarryx_u64(&x71, &x72, x70, x55, (x66 + x62));
+  fiat_p256_addcarryx_u64(&x73, &x74, x72, x57, x59);
+  fiat_p256_addcarryx_u64(&x75, &x76, x74, (((uint64_t)x58 + x36) + (x50 + x38)), x60);
+  fiat_p256_mulx_u64(&x77, &x78, x2, UINT64_C(0x4fffffffd));
+  fiat_p256_mulx_u64(&x79, &x80, x2, UINT64_C(0xfffffffffffffffe));
+  fiat_p256_mulx_u64(&x81, &x82, x2, UINT64_C(0xfffffffbffffffff));
+  fiat_p256_mulx_u64(&x83, &x84, x2, 0x3);
+  fiat_p256_addcarryx_u64(&x85, &x86, 0x0, x84, x81);
+  fiat_p256_addcarryx_u64(&x87, &x88, x86, x82, x79);
+  fiat_p256_addcarryx_u64(&x89, &x90, x88, x80, x77);
+  fiat_p256_addcarryx_u64(&x91, &x92, 0x0, x69, x83);
+  fiat_p256_addcarryx_u64(&x93, &x94, x92, x71, x85);
+  fiat_p256_addcarryx_u64(&x95, &x96, x94, x73, x87);
+  fiat_p256_addcarryx_u64(&x97, &x98, x96, x75, x89);
+  fiat_p256_mulx_u64(&x99, &x100, x91, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x101, &x102, x91, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x103, &x104, x91, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x105, &x106, 0x0, x104, x101);
+  fiat_p256_addcarryx_u64(&x107, &x108, 0x0, x91, x103);
+  fiat_p256_addcarryx_u64(&x109, &x110, x108, x93, x105);
+  fiat_p256_addcarryx_u64(&x111, &x112, x110, x95, (x106 + x102));
+  fiat_p256_addcarryx_u64(&x113, &x114, x112, x97, x99);
+  fiat_p256_addcarryx_u64(&x115, &x116, x114, (((uint64_t)x98 + x76) + (x90 + x78)), x100);
+  fiat_p256_mulx_u64(&x117, &x118, x3, UINT64_C(0x4fffffffd));
+  fiat_p256_mulx_u64(&x119, &x120, x3, UINT64_C(0xfffffffffffffffe));
+  fiat_p256_mulx_u64(&x121, &x122, x3, UINT64_C(0xfffffffbffffffff));
+  fiat_p256_mulx_u64(&x123, &x124, x3, 0x3);
+  fiat_p256_addcarryx_u64(&x125, &x126, 0x0, x124, x121);
+  fiat_p256_addcarryx_u64(&x127, &x128, x126, x122, x119);
+  fiat_p256_addcarryx_u64(&x129, &x130, x128, x120, x117);
+  fiat_p256_addcarryx_u64(&x131, &x132, 0x0, x109, x123);
+  fiat_p256_addcarryx_u64(&x133, &x134, x132, x111, x125);
+  fiat_p256_addcarryx_u64(&x135, &x136, x134, x113, x127);
+  fiat_p256_addcarryx_u64(&x137, &x138, x136, x115, x129);
+  fiat_p256_mulx_u64(&x139, &x140, x131, UINT64_C(0xffffffff00000001));
+  fiat_p256_mulx_u64(&x141, &x142, x131, UINT32_C(0xffffffff));
+  fiat_p256_mulx_u64(&x143, &x144, x131, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x145, &x146, 0x0, x144, x141);
+  fiat_p256_addcarryx_u64(&x147, &x148, 0x0, x131, x143);
+  fiat_p256_addcarryx_u64(&x149, &x150, x148, x133, x145);
+  fiat_p256_addcarryx_u64(&x151, &x152, x150, x135, (x146 + x142));
+  fiat_p256_addcarryx_u64(&x153, &x154, x152, x137, x139);
+  fiat_p256_addcarryx_u64(&x155, &x156, x154, (((uint64_t)x138 + x116) + (x130 + x118)), x140);
+  fiat_p256_subborrowx_u64(&x157, &x158, 0x0, x149, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x159, &x160, x158, x151, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x161, &x162, x160, x153, 0x0);
+  fiat_p256_subborrowx_u64(&x163, &x164, x162, x155, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x165, &x166, x164, x156, 0x0);
+  fiat_p256_cmovznz_u64(&x167, x166, x157, x149);
+  fiat_p256_cmovznz_u64(&x168, x166, x159, x151);
+  fiat_p256_cmovznz_u64(&x169, x166, x161, x153);
+  fiat_p256_cmovznz_u64(&x170, x166, x163, x155);
+  out1[0] = x167;
+  out1[1] = x168;
+  out1[2] = x169;
+  out1[3] = x170;
+}
+
+/*
  * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
@@ -1023,13 +1352,15 @@
  * Output Bounds:
  *   out1: [0x0 ~> 0xffffffffffffffff]
  */
-static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
-  uint64_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | (uint64_t)0x0))));
+static FIAT_P256_FIAT_INLINE void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
+  uint64_t x1;
+  x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3]))));
   *out1 = x1;
 }
 
 /*
  * The function fiat_p256_selectznz is a multi-limb conditional select.
+ *
  * Postconditions:
  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
  *
@@ -1040,14 +1371,14 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
+static FIAT_P256_FIAT_INLINE void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
   uint64_t x1;
-  fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
   uint64_t x2;
-  fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
   uint64_t x3;
-  fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
   uint64_t x4;
+  fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
+  fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
+  fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
   fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
   out1[0] = x1;
   out1[1] = x2;
@@ -1056,7 +1387,8 @@
 }
 
 /*
- * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order.
+ * The function fiat_p256_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+ *
  * Preconditions:
  *   0 ≤ eval arg1 < m
  * Postconditions:
@@ -1067,106 +1399,164 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
  */
-static void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) {
-  uint64_t x1 = (arg1[3]);
-  uint64_t x2 = (arg1[2]);
-  uint64_t x3 = (arg1[1]);
-  uint64_t x4 = (arg1[0]);
-  uint64_t x5 = (x4 >> 8);
-  uint8_t x6 = (uint8_t)(x4 & UINT8_C(0xff));
-  uint64_t x7 = (x5 >> 8);
-  uint8_t x8 = (uint8_t)(x5 & UINT8_C(0xff));
-  uint64_t x9 = (x7 >> 8);
-  uint8_t x10 = (uint8_t)(x7 & UINT8_C(0xff));
-  uint64_t x11 = (x9 >> 8);
-  uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff));
-  uint64_t x13 = (x11 >> 8);
-  uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff));
-  uint64_t x15 = (x13 >> 8);
-  uint8_t x16 = (uint8_t)(x13 & UINT8_C(0xff));
-  uint8_t x17 = (uint8_t)(x15 >> 8);
-  uint8_t x18 = (uint8_t)(x15 & UINT8_C(0xff));
-  uint8_t x19 = (uint8_t)(x17 & UINT8_C(0xff));
-  uint64_t x20 = (x3 >> 8);
-  uint8_t x21 = (uint8_t)(x3 & UINT8_C(0xff));
-  uint64_t x22 = (x20 >> 8);
-  uint8_t x23 = (uint8_t)(x20 & UINT8_C(0xff));
-  uint64_t x24 = (x22 >> 8);
-  uint8_t x25 = (uint8_t)(x22 & UINT8_C(0xff));
-  uint64_t x26 = (x24 >> 8);
-  uint8_t x27 = (uint8_t)(x24 & UINT8_C(0xff));
-  uint64_t x28 = (x26 >> 8);
-  uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff));
-  uint64_t x30 = (x28 >> 8);
-  uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff));
-  uint8_t x32 = (uint8_t)(x30 >> 8);
-  uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
-  uint8_t x34 = (uint8_t)(x32 & UINT8_C(0xff));
-  uint64_t x35 = (x2 >> 8);
-  uint8_t x36 = (uint8_t)(x2 & UINT8_C(0xff));
-  uint64_t x37 = (x35 >> 8);
-  uint8_t x38 = (uint8_t)(x35 & UINT8_C(0xff));
-  uint64_t x39 = (x37 >> 8);
-  uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff));
-  uint64_t x41 = (x39 >> 8);
-  uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
-  uint64_t x43 = (x41 >> 8);
-  uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff));
-  uint64_t x45 = (x43 >> 8);
-  uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff));
-  uint8_t x47 = (uint8_t)(x45 >> 8);
-  uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff));
-  uint8_t x49 = (uint8_t)(x47 & UINT8_C(0xff));
-  uint64_t x50 = (x1 >> 8);
-  uint8_t x51 = (uint8_t)(x1 & UINT8_C(0xff));
-  uint64_t x52 = (x50 >> 8);
-  uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
-  uint64_t x54 = (x52 >> 8);
-  uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
-  uint64_t x56 = (x54 >> 8);
-  uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff));
-  uint64_t x58 = (x56 >> 8);
-  uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff));
-  uint64_t x60 = (x58 >> 8);
-  uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
-  uint8_t x62 = (uint8_t)(x60 >> 8);
-  uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
-  out1[0] = x6;
-  out1[1] = x8;
-  out1[2] = x10;
-  out1[3] = x12;
-  out1[4] = x14;
-  out1[5] = x16;
-  out1[6] = x18;
-  out1[7] = x19;
-  out1[8] = x21;
-  out1[9] = x23;
-  out1[10] = x25;
-  out1[11] = x27;
-  out1[12] = x29;
-  out1[13] = x31;
-  out1[14] = x33;
-  out1[15] = x34;
-  out1[16] = x36;
-  out1[17] = x38;
-  out1[18] = x40;
-  out1[19] = x42;
-  out1[20] = x44;
-  out1[21] = x46;
-  out1[22] = x48;
-  out1[23] = x49;
-  out1[24] = x51;
-  out1[25] = x53;
-  out1[26] = x55;
-  out1[27] = x57;
-  out1[28] = x59;
-  out1[29] = x61;
-  out1[30] = x63;
-  out1[31] = x62;
+static FIAT_P256_FIAT_INLINE void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint8_t x5;
+  uint64_t x6;
+  uint8_t x7;
+  uint64_t x8;
+  uint8_t x9;
+  uint64_t x10;
+  uint8_t x11;
+  uint64_t x12;
+  uint8_t x13;
+  uint64_t x14;
+  uint8_t x15;
+  uint64_t x16;
+  uint8_t x17;
+  uint8_t x18;
+  uint8_t x19;
+  uint64_t x20;
+  uint8_t x21;
+  uint64_t x22;
+  uint8_t x23;
+  uint64_t x24;
+  uint8_t x25;
+  uint64_t x26;
+  uint8_t x27;
+  uint64_t x28;
+  uint8_t x29;
+  uint64_t x30;
+  uint8_t x31;
+  uint8_t x32;
+  uint8_t x33;
+  uint64_t x34;
+  uint8_t x35;
+  uint64_t x36;
+  uint8_t x37;
+  uint64_t x38;
+  uint8_t x39;
+  uint64_t x40;
+  uint8_t x41;
+  uint64_t x42;
+  uint8_t x43;
+  uint64_t x44;
+  uint8_t x45;
+  uint8_t x46;
+  uint8_t x47;
+  uint64_t x48;
+  uint8_t x49;
+  uint64_t x50;
+  uint8_t x51;
+  uint64_t x52;
+  uint8_t x53;
+  uint64_t x54;
+  uint8_t x55;
+  uint64_t x56;
+  uint8_t x57;
+  uint64_t x58;
+  uint8_t x59;
+  uint8_t x60;
+  x1 = (arg1[3]);
+  x2 = (arg1[2]);
+  x3 = (arg1[1]);
+  x4 = (arg1[0]);
+  x5 = (uint8_t)(x4 & UINT8_C(0xff));
+  x6 = (x4 >> 8);
+  x7 = (uint8_t)(x6 & UINT8_C(0xff));
+  x8 = (x6 >> 8);
+  x9 = (uint8_t)(x8 & UINT8_C(0xff));
+  x10 = (x8 >> 8);
+  x11 = (uint8_t)(x10 & UINT8_C(0xff));
+  x12 = (x10 >> 8);
+  x13 = (uint8_t)(x12 & UINT8_C(0xff));
+  x14 = (x12 >> 8);
+  x15 = (uint8_t)(x14 & UINT8_C(0xff));
+  x16 = (x14 >> 8);
+  x17 = (uint8_t)(x16 & UINT8_C(0xff));
+  x18 = (uint8_t)(x16 >> 8);
+  x19 = (uint8_t)(x3 & UINT8_C(0xff));
+  x20 = (x3 >> 8);
+  x21 = (uint8_t)(x20 & UINT8_C(0xff));
+  x22 = (x20 >> 8);
+  x23 = (uint8_t)(x22 & UINT8_C(0xff));
+  x24 = (x22 >> 8);
+  x25 = (uint8_t)(x24 & UINT8_C(0xff));
+  x26 = (x24 >> 8);
+  x27 = (uint8_t)(x26 & UINT8_C(0xff));
+  x28 = (x26 >> 8);
+  x29 = (uint8_t)(x28 & UINT8_C(0xff));
+  x30 = (x28 >> 8);
+  x31 = (uint8_t)(x30 & UINT8_C(0xff));
+  x32 = (uint8_t)(x30 >> 8);
+  x33 = (uint8_t)(x2 & UINT8_C(0xff));
+  x34 = (x2 >> 8);
+  x35 = (uint8_t)(x34 & UINT8_C(0xff));
+  x36 = (x34 >> 8);
+  x37 = (uint8_t)(x36 & UINT8_C(0xff));
+  x38 = (x36 >> 8);
+  x39 = (uint8_t)(x38 & UINT8_C(0xff));
+  x40 = (x38 >> 8);
+  x41 = (uint8_t)(x40 & UINT8_C(0xff));
+  x42 = (x40 >> 8);
+  x43 = (uint8_t)(x42 & UINT8_C(0xff));
+  x44 = (x42 >> 8);
+  x45 = (uint8_t)(x44 & UINT8_C(0xff));
+  x46 = (uint8_t)(x44 >> 8);
+  x47 = (uint8_t)(x1 & UINT8_C(0xff));
+  x48 = (x1 >> 8);
+  x49 = (uint8_t)(x48 & UINT8_C(0xff));
+  x50 = (x48 >> 8);
+  x51 = (uint8_t)(x50 & UINT8_C(0xff));
+  x52 = (x50 >> 8);
+  x53 = (uint8_t)(x52 & UINT8_C(0xff));
+  x54 = (x52 >> 8);
+  x55 = (uint8_t)(x54 & UINT8_C(0xff));
+  x56 = (x54 >> 8);
+  x57 = (uint8_t)(x56 & UINT8_C(0xff));
+  x58 = (x56 >> 8);
+  x59 = (uint8_t)(x58 & UINT8_C(0xff));
+  x60 = (uint8_t)(x58 >> 8);
+  out1[0] = x5;
+  out1[1] = x7;
+  out1[2] = x9;
+  out1[3] = x11;
+  out1[4] = x13;
+  out1[5] = x15;
+  out1[6] = x17;
+  out1[7] = x18;
+  out1[8] = x19;
+  out1[9] = x21;
+  out1[10] = x23;
+  out1[11] = x25;
+  out1[12] = x27;
+  out1[13] = x29;
+  out1[14] = x31;
+  out1[15] = x32;
+  out1[16] = x33;
+  out1[17] = x35;
+  out1[18] = x37;
+  out1[19] = x39;
+  out1[20] = x41;
+  out1[21] = x43;
+  out1[22] = x45;
+  out1[23] = x46;
+  out1[24] = x47;
+  out1[25] = x49;
+  out1[26] = x51;
+  out1[27] = x53;
+  out1[28] = x55;
+  out1[29] = x57;
+  out1[30] = x59;
+  out1[31] = x60;
 }
 
 /*
- * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order.
+ * The function fiat_p256_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+ *
  * Preconditions:
  *   0 ≤ bytes_eval arg1 < m
  * Postconditions:
@@ -1178,49 +1568,444 @@
  * Output Bounds:
  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
  */
-static void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) {
-  uint64_t x1 = ((uint64_t)(arg1[31]) << 56);
-  uint64_t x2 = ((uint64_t)(arg1[30]) << 48);
-  uint64_t x3 = ((uint64_t)(arg1[29]) << 40);
-  uint64_t x4 = ((uint64_t)(arg1[28]) << 32);
-  uint64_t x5 = ((uint64_t)(arg1[27]) << 24);
-  uint64_t x6 = ((uint64_t)(arg1[26]) << 16);
-  uint64_t x7 = ((uint64_t)(arg1[25]) << 8);
-  uint8_t x8 = (arg1[24]);
-  uint64_t x9 = ((uint64_t)(arg1[23]) << 56);
-  uint64_t x10 = ((uint64_t)(arg1[22]) << 48);
-  uint64_t x11 = ((uint64_t)(arg1[21]) << 40);
-  uint64_t x12 = ((uint64_t)(arg1[20]) << 32);
-  uint64_t x13 = ((uint64_t)(arg1[19]) << 24);
-  uint64_t x14 = ((uint64_t)(arg1[18]) << 16);
-  uint64_t x15 = ((uint64_t)(arg1[17]) << 8);
-  uint8_t x16 = (arg1[16]);
-  uint64_t x17 = ((uint64_t)(arg1[15]) << 56);
-  uint64_t x18 = ((uint64_t)(arg1[14]) << 48);
-  uint64_t x19 = ((uint64_t)(arg1[13]) << 40);
-  uint64_t x20 = ((uint64_t)(arg1[12]) << 32);
-  uint64_t x21 = ((uint64_t)(arg1[11]) << 24);
-  uint64_t x22 = ((uint64_t)(arg1[10]) << 16);
-  uint64_t x23 = ((uint64_t)(arg1[9]) << 8);
-  uint8_t x24 = (arg1[8]);
-  uint64_t x25 = ((uint64_t)(arg1[7]) << 56);
-  uint64_t x26 = ((uint64_t)(arg1[6]) << 48);
-  uint64_t x27 = ((uint64_t)(arg1[5]) << 40);
-  uint64_t x28 = ((uint64_t)(arg1[4]) << 32);
-  uint64_t x29 = ((uint64_t)(arg1[3]) << 24);
-  uint64_t x30 = ((uint64_t)(arg1[2]) << 16);
-  uint64_t x31 = ((uint64_t)(arg1[1]) << 8);
-  uint8_t x32 = (arg1[0]);
-  uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + (x26 + x25)))))));
-  uint64_t x34 = (x33 & UINT64_C(0xffffffffffffffff));
-  uint64_t x35 = (x8 + (x7 + (x6 + (x5 + (x4 + (x3 + (x2 + x1)))))));
-  uint64_t x36 = (x16 + (x15 + (x14 + (x13 + (x12 + (x11 + (x10 + x9)))))));
-  uint64_t x37 = (x24 + (x23 + (x22 + (x21 + (x20 + (x19 + (x18 + x17)))))));
-  uint64_t x38 = (x37 & UINT64_C(0xffffffffffffffff));
-  uint64_t x39 = (x36 & UINT64_C(0xffffffffffffffff));
-  out1[0] = x34;
-  out1[1] = x38;
-  out1[2] = x39;
-  out1[3] = x35;
+static FIAT_P256_FIAT_INLINE void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) {
+  uint64_t x1;
+  uint64_t x2;
+  uint64_t x3;
+  uint64_t x4;
+  uint64_t x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint8_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  uint64_t x13;
+  uint64_t x14;
+  uint64_t x15;
+  uint8_t x16;
+  uint64_t x17;
+  uint64_t x18;
+  uint64_t x19;
+  uint64_t x20;
+  uint64_t x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint8_t x24;
+  uint64_t x25;
+  uint64_t x26;
+  uint64_t x27;
+  uint64_t x28;
+  uint64_t x29;
+  uint64_t x30;
+  uint64_t x31;
+  uint8_t x32;
+  uint64_t x33;
+  uint64_t x34;
+  uint64_t x35;
+  uint64_t x36;
+  uint64_t x37;
+  uint64_t x38;
+  uint64_t x39;
+  uint64_t x40;
+  uint64_t x41;
+  uint64_t x42;
+  uint64_t x43;
+  uint64_t x44;
+  uint64_t x45;
+  uint64_t x46;
+  uint64_t x47;
+  uint64_t x48;
+  uint64_t x49;
+  uint64_t x50;
+  uint64_t x51;
+  uint64_t x52;
+  uint64_t x53;
+  uint64_t x54;
+  uint64_t x55;
+  uint64_t x56;
+  uint64_t x57;
+  uint64_t x58;
+  uint64_t x59;
+  uint64_t x60;
+  x1 = ((uint64_t)(arg1[31]) << 56);
+  x2 = ((uint64_t)(arg1[30]) << 48);
+  x3 = ((uint64_t)(arg1[29]) << 40);
+  x4 = ((uint64_t)(arg1[28]) << 32);
+  x5 = ((uint64_t)(arg1[27]) << 24);
+  x6 = ((uint64_t)(arg1[26]) << 16);
+  x7 = ((uint64_t)(arg1[25]) << 8);
+  x8 = (arg1[24]);
+  x9 = ((uint64_t)(arg1[23]) << 56);
+  x10 = ((uint64_t)(arg1[22]) << 48);
+  x11 = ((uint64_t)(arg1[21]) << 40);
+  x12 = ((uint64_t)(arg1[20]) << 32);
+  x13 = ((uint64_t)(arg1[19]) << 24);
+  x14 = ((uint64_t)(arg1[18]) << 16);
+  x15 = ((uint64_t)(arg1[17]) << 8);
+  x16 = (arg1[16]);
+  x17 = ((uint64_t)(arg1[15]) << 56);
+  x18 = ((uint64_t)(arg1[14]) << 48);
+  x19 = ((uint64_t)(arg1[13]) << 40);
+  x20 = ((uint64_t)(arg1[12]) << 32);
+  x21 = ((uint64_t)(arg1[11]) << 24);
+  x22 = ((uint64_t)(arg1[10]) << 16);
+  x23 = ((uint64_t)(arg1[9]) << 8);
+  x24 = (arg1[8]);
+  x25 = ((uint64_t)(arg1[7]) << 56);
+  x26 = ((uint64_t)(arg1[6]) << 48);
+  x27 = ((uint64_t)(arg1[5]) << 40);
+  x28 = ((uint64_t)(arg1[4]) << 32);
+  x29 = ((uint64_t)(arg1[3]) << 24);
+  x30 = ((uint64_t)(arg1[2]) << 16);
+  x31 = ((uint64_t)(arg1[1]) << 8);
+  x32 = (arg1[0]);
+  x33 = (x31 + (uint64_t)x32);
+  x34 = (x30 + x33);
+  x35 = (x29 + x34);
+  x36 = (x28 + x35);
+  x37 = (x27 + x36);
+  x38 = (x26 + x37);
+  x39 = (x25 + x38);
+  x40 = (x23 + (uint64_t)x24);
+  x41 = (x22 + x40);
+  x42 = (x21 + x41);
+  x43 = (x20 + x42);
+  x44 = (x19 + x43);
+  x45 = (x18 + x44);
+  x46 = (x17 + x45);
+  x47 = (x15 + (uint64_t)x16);
+  x48 = (x14 + x47);
+  x49 = (x13 + x48);
+  x50 = (x12 + x49);
+  x51 = (x11 + x50);
+  x52 = (x10 + x51);
+  x53 = (x9 + x52);
+  x54 = (x7 + (uint64_t)x8);
+  x55 = (x6 + x54);
+  x56 = (x5 + x55);
+  x57 = (x4 + x56);
+  x58 = (x3 + x57);
+  x59 = (x2 + x58);
+  x60 = (x1 + x59);
+  out1[0] = x39;
+  out1[1] = x46;
+  out1[2] = x53;
+  out1[3] = x60;
 }
 
+/*
+ * The function fiat_p256_set_one returns the field element one in the Montgomery domain.
+ *
+ * Postconditions:
+ *   eval (from_montgomery out1) mod m = 1 mod m
+ *   0 ≤ eval out1 < m
+ *
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_set_one(fiat_p256_montgomery_domain_field_element out1) {
+  out1[0] = 0x1;
+  out1[1] = UINT64_C(0xffffffff00000000);
+  out1[2] = UINT64_C(0xffffffffffffffff);
+  out1[3] = UINT32_C(0xfffffffe);
+}
+
+/*
+ * The function fiat_p256_msat returns the saturated representation of the prime modulus.
+ *
+ * Postconditions:
+ *   twos_complement_eval out1 = m
+ *   0 ≤ eval out1 < m
+ *
+ * Output Bounds:
+ *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_msat(uint64_t out1[5]) {
+  out1[0] = UINT64_C(0xffffffffffffffff);
+  out1[1] = UINT32_C(0xffffffff);
+  out1[2] = 0x0;
+  out1[3] = UINT64_C(0xffffffff00000001);
+  out1[4] = 0x0;
+}
+
+/*
+ * The function fiat_p256_divstep computes a divstep.
+ *
+ * Preconditions:
+ *   0 ≤ eval arg4 < m
+ *   0 ≤ eval arg5 < m
+ * Postconditions:
+ *   out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+ *   twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+ *   twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+ *   eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+ *   eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+ *   0 ≤ eval out5 < m
+ *   0 ≤ eval out5 < m
+ *   0 ≤ eval out2 < m
+ *   0 ≤ eval out3 < m
+ *
+ * Input Bounds:
+ *   arg1: [0x0 ~> 0xffffffffffffffff]
+ *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ *   out1: [0x0 ~> 0xffffffffffffffff]
+ *   out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ *   out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_divstep(uint64_t* out1, uint64_t out2[5], uint64_t out3[5], uint64_t out4[4], uint64_t out5[4], uint64_t arg1, const uint64_t arg2[5], const uint64_t arg3[5], const uint64_t arg4[4], const uint64_t arg5[4]) {
+  uint64_t x1;
+  fiat_p256_uint1 x2;
+  fiat_p256_uint1 x3;
+  uint64_t x4;
+  fiat_p256_uint1 x5;
+  uint64_t x6;
+  uint64_t x7;
+  uint64_t x8;
+  uint64_t x9;
+  uint64_t x10;
+  uint64_t x11;
+  uint64_t x12;
+  fiat_p256_uint1 x13;
+  uint64_t x14;
+  fiat_p256_uint1 x15;
+  uint64_t x16;
+  fiat_p256_uint1 x17;
+  uint64_t x18;
+  fiat_p256_uint1 x19;
+  uint64_t x20;
+  fiat_p256_uint1 x21;
+  uint64_t x22;
+  uint64_t x23;
+  uint64_t x24;
+  uint64_t x25;
+  uint64_t x26;
+  uint64_t x27;
+  uint64_t x28;
+  uint64_t x29;
+  uint64_t x30;
+  uint64_t x31;
+  fiat_p256_uint1 x32;
+  uint64_t x33;
+  fiat_p256_uint1 x34;
+  uint64_t x35;
+  fiat_p256_uint1 x36;
+  uint64_t x37;
+  fiat_p256_uint1 x38;
+  uint64_t x39;
+  fiat_p256_uint1 x40;
+  uint64_t x41;
+  fiat_p256_uint1 x42;
+  uint64_t x43;
+  fiat_p256_uint1 x44;
+  uint64_t x45;
+  fiat_p256_uint1 x46;
+  uint64_t x47;
+  fiat_p256_uint1 x48;
+  uint64_t x49;
+  uint64_t x50;
+  uint64_t x51;
+  uint64_t x52;
+  uint64_t x53;
+  fiat_p256_uint1 x54;
+  uint64_t x55;
+  fiat_p256_uint1 x56;
+  uint64_t x57;
+  fiat_p256_uint1 x58;
+  uint64_t x59;
+  fiat_p256_uint1 x60;
+  uint64_t x61;
+  uint64_t x62;
+  fiat_p256_uint1 x63;
+  uint64_t x64;
+  fiat_p256_uint1 x65;
+  uint64_t x66;
+  fiat_p256_uint1 x67;
+  uint64_t x68;
+  fiat_p256_uint1 x69;
+  uint64_t x70;
+  uint64_t x71;
+  uint64_t x72;
+  uint64_t x73;
+  fiat_p256_uint1 x74;
+  uint64_t x75;
+  uint64_t x76;
+  uint64_t x77;
+  uint64_t x78;
+  uint64_t x79;
+  uint64_t x80;
+  fiat_p256_uint1 x81;
+  uint64_t x82;
+  fiat_p256_uint1 x83;
+  uint64_t x84;
+  fiat_p256_uint1 x85;
+  uint64_t x86;
+  fiat_p256_uint1 x87;
+  uint64_t x88;
+  fiat_p256_uint1 x89;
+  uint64_t x90;
+  uint64_t x91;
+  uint64_t x92;
+  uint64_t x93;
+  uint64_t x94;
+  fiat_p256_uint1 x95;
+  uint64_t x96;
+  fiat_p256_uint1 x97;
+  uint64_t x98;
+  fiat_p256_uint1 x99;
+  uint64_t x100;
+  fiat_p256_uint1 x101;
+  uint64_t x102;
+  fiat_p256_uint1 x103;
+  uint64_t x104;
+  fiat_p256_uint1 x105;
+  uint64_t x106;
+  fiat_p256_uint1 x107;
+  uint64_t x108;
+  fiat_p256_uint1 x109;
+  uint64_t x110;
+  fiat_p256_uint1 x111;
+  uint64_t x112;
+  fiat_p256_uint1 x113;
+  uint64_t x114;
+  uint64_t x115;
+  uint64_t x116;
+  uint64_t x117;
+  uint64_t x118;
+  uint64_t x119;
+  uint64_t x120;
+  uint64_t x121;
+  uint64_t x122;
+  uint64_t x123;
+  uint64_t x124;
+  uint64_t x125;
+  uint64_t x126;
+  fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1);
+  x3 = (fiat_p256_uint1)((fiat_p256_uint1)(x1 >> 63) & (fiat_p256_uint1)((arg3[0]) & 0x1));
+  fiat_p256_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1);
+  fiat_p256_cmovznz_u64(&x6, x3, arg1, x4);
+  fiat_p256_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0]));
+  fiat_p256_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1]));
+  fiat_p256_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2]));
+  fiat_p256_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3]));
+  fiat_p256_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4]));
+  fiat_p256_addcarryx_u64(&x12, &x13, 0x0, 0x1, (~(arg2[0])));
+  fiat_p256_addcarryx_u64(&x14, &x15, x13, 0x0, (~(arg2[1])));
+  fiat_p256_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[2])));
+  fiat_p256_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[3])));
+  fiat_p256_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[4])));
+  fiat_p256_cmovznz_u64(&x22, x3, (arg3[0]), x12);
+  fiat_p256_cmovznz_u64(&x23, x3, (arg3[1]), x14);
+  fiat_p256_cmovznz_u64(&x24, x3, (arg3[2]), x16);
+  fiat_p256_cmovznz_u64(&x25, x3, (arg3[3]), x18);
+  fiat_p256_cmovznz_u64(&x26, x3, (arg3[4]), x20);
+  fiat_p256_cmovznz_u64(&x27, x3, (arg4[0]), (arg5[0]));
+  fiat_p256_cmovznz_u64(&x28, x3, (arg4[1]), (arg5[1]));
+  fiat_p256_cmovznz_u64(&x29, x3, (arg4[2]), (arg5[2]));
+  fiat_p256_cmovznz_u64(&x30, x3, (arg4[3]), (arg5[3]));
+  fiat_p256_addcarryx_u64(&x31, &x32, 0x0, x27, x27);
+  fiat_p256_addcarryx_u64(&x33, &x34, x32, x28, x28);
+  fiat_p256_addcarryx_u64(&x35, &x36, x34, x29, x29);
+  fiat_p256_addcarryx_u64(&x37, &x38, x36, x30, x30);
+  fiat_p256_subborrowx_u64(&x39, &x40, 0x0, x31, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x41, &x42, x40, x33, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x43, &x44, x42, x35, 0x0);
+  fiat_p256_subborrowx_u64(&x45, &x46, x44, x37, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x47, &x48, x46, x38, 0x0);
+  x49 = (arg4[3]);
+  x50 = (arg4[2]);
+  x51 = (arg4[1]);
+  x52 = (arg4[0]);
+  fiat_p256_subborrowx_u64(&x53, &x54, 0x0, 0x0, x52);
+  fiat_p256_subborrowx_u64(&x55, &x56, x54, 0x0, x51);
+  fiat_p256_subborrowx_u64(&x57, &x58, x56, 0x0, x50);
+  fiat_p256_subborrowx_u64(&x59, &x60, x58, 0x0, x49);
+  fiat_p256_cmovznz_u64(&x61, x60, 0x0, UINT64_C(0xffffffffffffffff));
+  fiat_p256_addcarryx_u64(&x62, &x63, 0x0, x53, x61);
+  fiat_p256_addcarryx_u64(&x64, &x65, x63, x55, (x61 & UINT32_C(0xffffffff)));
+  fiat_p256_addcarryx_u64(&x66, &x67, x65, x57, 0x0);
+  fiat_p256_addcarryx_u64(&x68, &x69, x67, x59, (x61 & UINT64_C(0xffffffff00000001)));
+  fiat_p256_cmovznz_u64(&x70, x3, (arg5[0]), x62);
+  fiat_p256_cmovznz_u64(&x71, x3, (arg5[1]), x64);
+  fiat_p256_cmovznz_u64(&x72, x3, (arg5[2]), x66);
+  fiat_p256_cmovznz_u64(&x73, x3, (arg5[3]), x68);
+  x74 = (fiat_p256_uint1)(x22 & 0x1);
+  fiat_p256_cmovznz_u64(&x75, x74, 0x0, x7);
+  fiat_p256_cmovznz_u64(&x76, x74, 0x0, x8);
+  fiat_p256_cmovznz_u64(&x77, x74, 0x0, x9);
+  fiat_p256_cmovznz_u64(&x78, x74, 0x0, x10);
+  fiat_p256_cmovznz_u64(&x79, x74, 0x0, x11);
+  fiat_p256_addcarryx_u64(&x80, &x81, 0x0, x22, x75);
+  fiat_p256_addcarryx_u64(&x82, &x83, x81, x23, x76);
+  fiat_p256_addcarryx_u64(&x84, &x85, x83, x24, x77);
+  fiat_p256_addcarryx_u64(&x86, &x87, x85, x25, x78);
+  fiat_p256_addcarryx_u64(&x88, &x89, x87, x26, x79);
+  fiat_p256_cmovznz_u64(&x90, x74, 0x0, x27);
+  fiat_p256_cmovznz_u64(&x91, x74, 0x0, x28);
+  fiat_p256_cmovznz_u64(&x92, x74, 0x0, x29);
+  fiat_p256_cmovznz_u64(&x93, x74, 0x0, x30);
+  fiat_p256_addcarryx_u64(&x94, &x95, 0x0, x70, x90);
+  fiat_p256_addcarryx_u64(&x96, &x97, x95, x71, x91);
+  fiat_p256_addcarryx_u64(&x98, &x99, x97, x72, x92);
+  fiat_p256_addcarryx_u64(&x100, &x101, x99, x73, x93);
+  fiat_p256_subborrowx_u64(&x102, &x103, 0x0, x94, UINT64_C(0xffffffffffffffff));
+  fiat_p256_subborrowx_u64(&x104, &x105, x103, x96, UINT32_C(0xffffffff));
+  fiat_p256_subborrowx_u64(&x106, &x107, x105, x98, 0x0);
+  fiat_p256_subborrowx_u64(&x108, &x109, x107, x100, UINT64_C(0xffffffff00000001));
+  fiat_p256_subborrowx_u64(&x110, &x111, x109, x101, 0x0);
+  fiat_p256_addcarryx_u64(&x112, &x113, 0x0, x6, 0x1);
+  x114 = ((x80 >> 1) | ((x82 << 63) & UINT64_C(0xffffffffffffffff)));
+  x115 = ((x82 >> 1) | ((x84 << 63) & UINT64_C(0xffffffffffffffff)));
+  x116 = ((x84 >> 1) | ((x86 << 63) & UINT64_C(0xffffffffffffffff)));
+  x117 = ((x86 >> 1) | ((x88 << 63) & UINT64_C(0xffffffffffffffff)));
+  x118 = ((x88 & UINT64_C(0x8000000000000000)) | (x88 >> 1));
+  fiat_p256_cmovznz_u64(&x119, x48, x39, x31);
+  fiat_p256_cmovznz_u64(&x120, x48, x41, x33);
+  fiat_p256_cmovznz_u64(&x121, x48, x43, x35);
+  fiat_p256_cmovznz_u64(&x122, x48, x45, x37);
+  fiat_p256_cmovznz_u64(&x123, x111, x102, x94);
+  fiat_p256_cmovznz_u64(&x124, x111, x104, x96);
+  fiat_p256_cmovznz_u64(&x125, x111, x106, x98);
+  fiat_p256_cmovznz_u64(&x126, x111, x108, x100);
+  *out1 = x112;
+  out2[0] = x7;
+  out2[1] = x8;
+  out2[2] = x9;
+  out2[3] = x10;
+  out2[4] = x11;
+  out3[0] = x114;
+  out3[1] = x115;
+  out3[2] = x116;
+  out3[3] = x117;
+  out3[4] = x118;
+  out4[0] = x119;
+  out4[1] = x120;
+  out4[2] = x121;
+  out4[3] = x122;
+  out5[0] = x123;
+  out5[1] = x124;
+  out5[2] = x125;
+  out5[3] = x126;
+}
+
+/*
+ * The function fiat_p256_divstep_precomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form).
+ *
+ * Postconditions:
+ *   eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if ⌊log2 m⌋ + 1 < 46 then ⌊(49 * (⌊log2 m⌋ + 1) + 80) / 17⌋ else ⌊(49 * (⌊log2 m⌋ + 1) + 57) / 17⌋)
+ *   0 ≤ eval out1 < m
+ *
+ * Output Bounds:
+ *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static FIAT_P256_FIAT_INLINE void fiat_p256_divstep_precomp(uint64_t out1[4]) {
+  out1[0] = UINT64_C(0x67ffffffb8000000);
+  out1[1] = UINT64_C(0xc000000038000000);
+  out1[2] = UINT64_C(0xd80000007fffffff);
+  out1[3] = UINT64_C(0x2fffffffffffffff);
+}
diff --git a/src/util/bot/DEPS b/src/util/bot/DEPS
index 574d94b..b9d42ef 100644
--- a/src/util/bot/DEPS
+++ b/src/util/bot/DEPS
@@ -19,7 +19,7 @@
   'checkout_sde': False,
   'checkout_nasm': False,
   'checkout_libcxx': False,
-  'vs_version': '2015',
+  'vs_version': '2017',
 
   # Run the following command to see the latest builds in CIPD:
   #  cipd describe PACKAGE_NAME -version latest
diff --git a/src/util/bot/vs_toolchain.py b/src/util/bot/vs_toolchain.py
index 051d78b..09f407d 100644
--- a/src/util/bot/vs_toolchain.py
+++ b/src/util/bot/vs_toolchain.py
@@ -62,14 +62,15 @@
 def _GetDesiredVsToolchainHashes(version):
   """Load a list of SHA1s corresponding to the toolchains that we want installed
   to build with."""
-  if version == '2015':
-    # Update 3 final with 10.0.15063.468 SDK and no vctip.exe.
-    return ['f53e4598951162bad6330f7a167486c7ae5db1e5']
   if version == '2017':
     # VS 2017 Update 9 (15.9.12) with 10.0.18362 SDK, 10.0.17763 version of
     # Debuggers, and 10.0.17134 version of d3dcompiler_47.dll, with ARM64
     # libraries.
     return ['418b3076791776573a815eb298c8aa590307af63']
+  if version == '2019':
+    # VS 2019 16.61 with 10.0.19041 SDK, and 10.0.20348 version of
+    # d3dcompiler_47.dll, with ARM64 libraries and UWP support.
+    return ['3bda71a11e']
   raise Exception('Unsupported VS version %s' % version)
 
 
diff --git a/src/util/fipstools/test_fips.c b/src/util/fipstools/test_fips.c
index b3d5521..e192b61 100644
--- a/src/util/fipstools/test_fips.c
+++ b/src/util/fipstools/test_fips.c
@@ -47,6 +47,13 @@
 int main(int argc, char **argv) {
   CRYPTO_library_init();
 
+  const uint32_t module_version = FIPS_version();
+  if (module_version == 0) {
+    printf("No module version set\n");
+    goto err;
+  }
+  printf("Module version: %" PRIu32 "\n", module_version);
+
   static const uint8_t kAESKey[16] = "BoringCrypto Key";
   static const uint8_t kPlaintext[64] =
       "BoringCryptoModule FIPS KAT Encryption and Decryption Plaintext!";
diff --git a/src/util/whitespace.txt b/src/util/whitespace.txt
index c311da3..08ccc0a 100644
--- a/src/util/whitespace.txt
+++ b/src/util/whitespace.txt
@@ -1 +1 @@
-This file is ignored.  It exists to make no-op commits to trigger new builds.
+This file is ignored. It exists to make no-op commits to trigger new builds.