Snap for 7821744 from 45b2647cce025e868a5352d42dac584aa74903b4 to sdk-release

Change-Id: I1bdadc1a5cde74487beae39a6210c388d773d499
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 04ce902..2c927ab 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "d1e4498cbc02d22b3ddded7216c38d94e494cf90"
+    "sha1": "8ee0b9ac69a43a44cbc648e3524e594e7db54eb3"
   }
 }
diff --git a/Android.bp b/Android.bp
index 6e46e32..1d29c81 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,8 +1,6 @@
 // This file is generated by cargo2android.py --config cargo2android.json.
 // Do not modify this file as changes will be overridden on upgrade.
 
-
-
 package {
     default_applicable_licenses: ["external_rust_crates_num-bigint_license"],
 }
@@ -57,6 +55,8 @@
     name: "libnum_bigint",
     host_supported: true,
     crate_name: "num_bigint",
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.4.2",
     srcs: [
         "src/lib.rs",
         ":copy_num-bigint_build_out",
@@ -83,6 +83,8 @@
         "src/lib.rs",
         ":copy_num-bigint_build_out",
     ],
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.4.2",
     test_suites: ["general-tests"],
     auto_gen_config: true,
     edition: "2018",
@@ -116,6 +118,8 @@
 rust_defaults {
     name: "num-bigint_test_defaults_num_bigint",
     crate_name: "num_bigint",
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.4.2",
     test_suites: ["general-tests"],
     auto_gen_config: true,
     edition: "2018",
diff --git a/Cargo.toml b/Cargo.toml
index 9c2fdea..54ccc1d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "num-bigint"
-version = "0.4.0"
+version = "0.4.2"
 authors = ["The Rust Project Developers"]
 build = "build.rs"
 exclude = ["/bors.toml", "/ci/*", "/.github/*"]
diff --git a/METADATA b/METADATA
index 4020657..8ce088c 100644
--- a/METADATA
+++ b/METADATA
@@ -7,14 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/num-bigint/num-bigint-0.4.0.crate"
+    value: "https://static.crates.io/crates/num-bigint/num-bigint-0.4.2.crate"
   }
-  version: "0.4.0"
-  # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same.
+  version: "0.4.2"
   license_type: NOTICE
   last_upgrade_date {
     year: 2021
-    month: 7
-    day: 9
+    month: 9
+    day: 22
   }
 }
diff --git a/RELEASES.md b/RELEASES.md
index fd07b1e..b06f4fa 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,3 +1,25 @@
+# Release 0.4.2 (2021-09-03)
+
+- [Use explicit `Integer::div_ceil` to avoid the new unstable method.][219]
+
+**Contributors**: @catenacyber, @cuviper
+
+[219]: https://github.com/rust-num/num-bigint/pull/219
+
+# Release 0.4.1 (2021-08-27)
+
+- [Fixed scalar divide-by-zero panics.][200]
+- [Implemented `DoubleEndedIterator` for `U32Digits` and `U64Digits`.][208]
+- [Optimized multiplication to avoid unnecessary allocations.][199]
+- [Optimized string formatting for very large values.][216]
+
+**Contributors**: @cuviper, @PatrickNorton
+
+[199]: https://github.com/rust-num/num-bigint/pull/199
+[200]: https://github.com/rust-num/num-bigint/pull/200
+[208]: https://github.com/rust-num/num-bigint/pull/208
+[216]: https://github.com/rust-num/num-bigint/pull/216
+
 # Release 0.4.0 (2021-03-05)
 
 ### Breaking Changes
diff --git a/benches/bigint.rs b/benches/bigint.rs
index b7f5fd2..80ec191 100644
--- a/benches/bigint.rs
+++ b/benches/bigint.rs
@@ -39,7 +39,7 @@
     let mut f: BigUint = One::one();
     for i in 1..=n {
         let bu: BigUint = FromPrimitive::from_usize(i).unwrap();
-        f += bu;
+        f *= bu;
     }
     f
 }
@@ -174,35 +174,40 @@
     b.iter(|| fib.to_string());
 }
 
-fn to_str_radix_bench(b: &mut Bencher, radix: u32) {
+fn to_str_radix_bench(b: &mut Bencher, radix: u32, bits: u64) {
     let mut rng = get_rng();
-    let x = rng.gen_bigint(1009);
+    let x = rng.gen_bigint(bits);
     b.iter(|| x.to_str_radix(radix));
 }
 
 #[bench]
 fn to_str_radix_02(b: &mut Bencher) {
-    to_str_radix_bench(b, 2);
+    to_str_radix_bench(b, 2, 1009);
 }
 
 #[bench]
 fn to_str_radix_08(b: &mut Bencher) {
-    to_str_radix_bench(b, 8);
+    to_str_radix_bench(b, 8, 1009);
 }
 
 #[bench]
 fn to_str_radix_10(b: &mut Bencher) {
-    to_str_radix_bench(b, 10);
+    to_str_radix_bench(b, 10, 1009);
+}
+
+#[bench]
+fn to_str_radix_10_2(b: &mut Bencher) {
+    to_str_radix_bench(b, 10, 10009);
 }
 
 #[bench]
 fn to_str_radix_16(b: &mut Bencher) {
-    to_str_radix_bench(b, 16);
+    to_str_radix_bench(b, 16, 1009);
 }
 
 #[bench]
 fn to_str_radix_36(b: &mut Bencher) {
-    to_str_radix_bench(b, 36);
+    to_str_radix_bench(b, 36, 1009);
 }
 
 fn from_str_radix_bench(b: &mut Bencher, radix: u32) {
@@ -351,6 +356,21 @@
     });
 }
 
+#[bench]
+fn pow_bench_1e1000(b: &mut Bencher) {
+    b.iter(|| BigUint::from(10u32).pow(1_000));
+}
+
+#[bench]
+fn pow_bench_1e10000(b: &mut Bencher) {
+    b.iter(|| BigUint::from(10u32).pow(10_000));
+}
+
+#[bench]
+fn pow_bench_1e100000(b: &mut Bencher) {
+    b.iter(|| BigUint::from(10u32).pow(100_000));
+}
+
 /// This modulus is the prime from the 2048-bit MODP DH group:
 /// https://tools.ietf.org/html/rfc3526#section-3
 const RFC3526_2048BIT_MODP_GROUP: &str = "\
diff --git a/out/probe0.ll b/out/probe0.ll
index 31681b1..296297c 100644
--- a/out/probe0.ll
+++ b/out/probe0.ll
@@ -1,5 +1,5 @@
-; ModuleID = 'probe0.3a1fbbbh-cgu.0'
-source_filename = "probe0.3a1fbbbh-cgu.0"
+; ModuleID = 'probe0.3041c4be-cgu.0'
+source_filename = "probe0.3041c4be-cgu.0"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/out/probe1.ll b/out/probe1.ll
index 5715d6b..dd4e8ea 100644
--- a/out/probe1.ll
+++ b/out/probe1.ll
@@ -1,5 +1,5 @@
-; ModuleID = 'probe1.3a1fbbbh-cgu.0'
-source_filename = "probe1.3a1fbbbh-cgu.0"
+; ModuleID = 'probe1.56ebf6f4-cgu.0'
+source_filename = "probe1.56ebf6f4-cgu.0"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/out/probe2.ll b/out/probe2.ll
index be90c33..a93303e 100644
--- a/out/probe2.ll
+++ b/out/probe2.ll
@@ -1,5 +1,5 @@
-; ModuleID = 'probe2.3a1fbbbh-cgu.0'
-source_filename = "probe2.3a1fbbbh-cgu.0"
+; ModuleID = 'probe2.a8400cd7-cgu.0'
+source_filename = "probe2.a8400cd7-cgu.0"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/out/probe3.ll b/out/probe3.ll
index 0c07f3c..c0660f8 100644
--- a/out/probe3.ll
+++ b/out/probe3.ll
@@ -1,5 +1,5 @@
-; ModuleID = 'probe3.3a1fbbbh-cgu.0'
-source_filename = "probe3.3a1fbbbh-cgu.0"
+; ModuleID = 'probe3.474510b1-cgu.0'
+source_filename = "probe3.474510b1-cgu.0"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/patches/disable_tests.patch b/patches/disable_tests.patch
new file mode 100644
index 0000000..c08b7d9
--- /dev/null
+++ b/patches/disable_tests.patch
@@ -0,0 +1,24 @@
+diff --git a/tests/bigint_scalar.rs b/tests/bigint_scalar.rs
+index 2a19faf..a4348f4 100644
+--- a/tests/bigint_scalar.rs
++++ b/tests/bigint_scalar.rs
+@@ -149,6 +149,7 @@ fn test_scalar_div_rem() {
+ }
+ 
+ #[test]
++#[ignore = "Android sometimes uses panic_abort"]
+ fn test_scalar_div_rem_zero() {
+     catch_unwind(|| BigInt::zero() / 0u32).unwrap_err();
+     catch_unwind(|| BigInt::zero() % 0u32).unwrap_err();
+diff --git a/tests/biguint_scalar.rs b/tests/biguint_scalar.rs
+index 7c34f7e..5b9f3ea 100644
+--- a/tests/biguint_scalar.rs
++++ b/tests/biguint_scalar.rs
+@@ -115,6 +115,7 @@ fn test_scalar_div_rem() {
+ }
+ 
+ #[test]
++#[ignore = "Android sometimes uses panic_abort"]
+ fn test_scalar_div_rem_zero() {
+     catch_unwind(|| BigUint::zero() / 0u32).unwrap_err();
+     catch_unwind(|| BigUint::zero() % 0u32).unwrap_err();
diff --git a/src/bigint/multiplication.rs b/src/bigint/multiplication.rs
index aaf5b14..a2d9708 100644
--- a/src/bigint/multiplication.rs
+++ b/src/bigint/multiplication.rs
@@ -21,24 +21,49 @@
     }
 }
 
-forward_all_binop_to_ref_ref!(impl Mul for BigInt, mul);
+macro_rules! impl_mul {
+    ($(impl<$($a:lifetime),*> Mul<$Other:ty> for $Self:ty;)*) => {$(
+        impl<$($a),*> Mul<$Other> for $Self {
+            type Output = BigInt;
 
-impl<'a, 'b> Mul<&'b BigInt> for &'a BigInt {
-    type Output = BigInt;
-
-    #[inline]
-    fn mul(self, other: &BigInt) -> BigInt {
-        BigInt::from_biguint(self.sign * other.sign, &self.data * &other.data)
-    }
+            #[inline]
+            fn mul(self, other: $Other) -> BigInt {
+                // automatically match value/ref
+                let BigInt { data: x, .. } = self;
+                let BigInt { data: y, .. } = other;
+                BigInt::from_biguint(self.sign * other.sign, x * y)
+            }
+        }
+    )*}
+}
+impl_mul! {
+    impl<> Mul<BigInt> for BigInt;
+    impl<'b> Mul<&'b BigInt> for BigInt;
+    impl<'a> Mul<BigInt> for &'a BigInt;
+    impl<'a, 'b> Mul<&'b BigInt> for &'a BigInt;
 }
 
-impl<'a> MulAssign<&'a BigInt> for BigInt {
-    #[inline]
-    fn mul_assign(&mut self, other: &BigInt) {
-        *self = &*self * other;
-    }
+macro_rules! impl_mul_assign {
+    ($(impl<$($a:lifetime),*> MulAssign<$Other:ty> for BigInt;)*) => {$(
+        impl<$($a),*> MulAssign<$Other> for BigInt {
+            #[inline]
+            fn mul_assign(&mut self, other: $Other) {
+                // automatically match value/ref
+                let BigInt { data: y, .. } = other;
+                self.data *= y;
+                if self.data.is_zero() {
+                    self.sign = NoSign;
+                } else {
+                    self.sign = self.sign * other.sign;
+                }
+            }
+        }
+    )*}
 }
-forward_val_assign!(impl MulAssign for BigInt, mul_assign);
+impl_mul_assign! {
+    impl<> MulAssign<BigInt> for BigInt;
+    impl<'a> MulAssign<&'a BigInt> for BigInt;
+}
 
 promote_all_scalars!(impl Mul for BigInt, mul);
 promote_all_scalars_assign!(impl MulAssign for BigInt, mul_assign);
diff --git a/src/bigrand.rs b/src/bigrand.rs
index cb44032..8f0ce5b 100644
--- a/src/bigrand.rs
+++ b/src/bigrand.rs
@@ -66,7 +66,7 @@
         let len = (digits + (rem > 0) as u64)
             .to_usize()
             .expect("capacity overflow");
-        let native_digits = bit_size.div_ceil(&64);
+        let native_digits = Integer::div_ceil(&bit_size, &64);
         let native_len = native_digits.to_usize().expect("capacity overflow");
         let mut data = vec![0u64; native_len];
         unsafe {
diff --git a/src/biguint.rs b/src/biguint.rs
index 4235071..623823c 100644
--- a/src/biguint.rs
+++ b/src/biguint.rs
@@ -395,7 +395,7 @@
                 // Try to guess by scaling down such that it does fit in `f64`.
                 // With some (x * 2ⁿᵏ), its nth root ≈ (ⁿ√x * 2ᵏ)
                 let extra_bits = bits - (core::f64::MAX_EXP as u64 - 1);
-                let root_scale = extra_bits.div_ceil(&n64);
+                let root_scale = Integer::div_ceil(&extra_bits, &n64);
                 let scale = root_scale * n64;
                 if scale < bits && bits - scale > n64 {
                     (self >> scale).nth_root(n) << root_scale
@@ -846,8 +846,9 @@
     /// be nonzero.
     #[inline]
     fn normalize(&mut self) {
-        while let Some(&0) = self.data.last() {
-            self.data.pop();
+        if let Some(&0) = self.data.last() {
+            let len = self.data.iter().rposition(|&d| d != 0).map_or(0, |i| i + 1);
+            self.data.truncate(len);
         }
         if self.data.len() < self.data.capacity() / 4 {
             self.data.shrink_to_fit();
diff --git a/src/biguint/convert.rs b/src/biguint/convert.rs
index 278ec78..5cf05cb 100644
--- a/src/biguint/convert.rs
+++ b/src/biguint/convert.rs
@@ -15,7 +15,7 @@
 use core::convert::TryFrom;
 use core::mem;
 use core::str::FromStr;
-use num_integer::Integer;
+use num_integer::{Integer, Roots};
 use num_traits::float::FloatCore;
 use num_traits::{FromPrimitive, Num, PrimInt, ToPrimitive, Zero};
 
@@ -65,9 +65,8 @@
     debug_assert!(!v.is_empty() && bits <= 8 && big_digit::BITS % bits != 0);
     debug_assert!(v.iter().all(|&c| BigDigit::from(c) < (1 << bits)));
 
-    let big_digits = (v.len() as u64)
-        .saturating_mul(bits.into())
-        .div_ceil(&big_digit::BITS.into())
+    let total_bits = (v.len() as u64).saturating_mul(bits.into());
+    let big_digits = Integer::div_ceil(&total_bits, &big_digit::BITS.into())
         .to_usize()
         .unwrap_or(core::usize::MAX);
     let mut data = Vec::with_capacity(big_digits);
@@ -580,9 +579,7 @@
     let last_i = u.data.len() - 1;
     let mask: BigDigit = (1 << bits) - 1;
     let digits_per_big_digit = big_digit::BITS / bits;
-    let digits = u
-        .bits()
-        .div_ceil(&u64::from(bits))
+    let digits = Integer::div_ceil(&u.bits(), &u64::from(bits))
         .to_usize()
         .unwrap_or(core::usize::MAX);
     let mut res = Vec::with_capacity(digits);
@@ -608,9 +605,7 @@
     debug_assert!(!u.is_zero() && bits <= 8 && big_digit::BITS % bits != 0);
 
     let mask: BigDigit = (1 << bits) - 1;
-    let digits = u
-        .bits()
-        .div_ceil(&u64::from(bits))
+    let digits = Integer::div_ceil(&u.bits(), &u64::from(bits))
         .to_usize()
         .unwrap_or(core::usize::MAX);
     let mut res = Vec::with_capacity(digits);
@@ -665,6 +660,39 @@
     let (base, power) = get_radix_base(radix, big_digit::HALF_BITS);
     let radix = radix as BigDigit;
 
+    // For very large numbers, the O(n²) loop of repeated `div_rem_digit` dominates the
+    // performance. We can mitigate this by dividing into chunks of a larger base first.
+    // The threshold for this was chosen by anecdotal performance measurements to
+    // approximate where this starts to make a noticeable difference.
+    if digits.data.len() >= 64 {
+        let mut big_base = BigUint::from(base * base);
+        let mut big_power = 2usize;
+
+        // Choose a target base length near √n.
+        let target_len = digits.data.len().sqrt();
+        while big_base.data.len() < target_len {
+            big_base = &big_base * &big_base;
+            big_power *= 2;
+        }
+
+        // This outer loop will run approximately √n times.
+        while digits > big_base {
+            // This is still the dominating factor, with n digits divided by √n digits.
+            let (q, mut big_r) = digits.div_rem(&big_base);
+            digits = q;
+
+            // This inner loop now has O(√n²)=O(n) behavior altogether.
+            for _ in 0..big_power {
+                let (q, mut r) = div_rem_digit(big_r, base);
+                big_r = q;
+                for _ in 0..power {
+                    res.push((r % radix) as u8);
+                    r /= radix;
+                }
+            }
+        }
+    }
+
     while digits.data.len() > 1 {
         let (q, mut r) = div_rem_digit(digits, base);
         for _ in 0..power {
diff --git a/src/biguint/division.rs b/src/biguint/division.rs
index 030b185..b5d4259 100644
--- a/src/biguint/division.rs
+++ b/src/biguint/division.rs
@@ -1,7 +1,7 @@
 use super::addition::__add2;
 #[cfg(not(u64_digit))]
 use super::u32_to_u128;
-use super::BigUint;
+use super::{cmp_slice, BigUint};
 
 use crate::big_digit::{self, BigDigit, DoubleBigDigit};
 use crate::UsizePromotion;
@@ -41,6 +41,10 @@
 
 #[inline]
 pub(super) fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) {
+    if b == 0 {
+        panic!("attempt to divide by zero")
+    }
+
     let mut rem = 0;
 
     if b <= big_digit::HALF {
@@ -62,6 +66,10 @@
 
 #[inline]
 fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit {
+    if b == 0 {
+        panic!("attempt to divide by zero")
+    }
+
     let mut rem = 0;
 
     if b <= big_digit::HALF {
@@ -145,14 +153,14 @@
     //
     let shift = d.data.last().unwrap().leading_zeros() as usize;
 
-    let (q, r) = if shift == 0 {
+    if shift == 0 {
         // no need to clone d
-        div_rem_core(u, &d)
+        div_rem_core(u, &d.data)
     } else {
-        div_rem_core(u << shift, &(d << shift))
-    };
-    // renormalize the remainder
-    (q, r >> shift)
+        let (q, r) = div_rem_core(u << shift, &(d << shift).data);
+        // renormalize the remainder
+        (q, r >> shift)
+    }
 }
 
 pub(super) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
@@ -187,24 +195,21 @@
     //
     let shift = d.data.last().unwrap().leading_zeros() as usize;
 
-    let (q, r) = if shift == 0 {
+    if shift == 0 {
         // no need to clone d
-        div_rem_core(u.clone(), d)
+        div_rem_core(u.clone(), &d.data)
     } else {
-        div_rem_core(u << shift, &(d << shift))
-    };
-    // renormalize the remainder
-    (q, r >> shift)
+        let (q, r) = div_rem_core(u << shift, &(d << shift).data);
+        // renormalize the remainder
+        (q, r >> shift)
+    }
 }
 
 /// An implementation of the base division algorithm.
 /// Knuth, TAOCP vol 2 section 4.3.1, algorithm D, with an improvement from exercises 19-21.
-fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
-    debug_assert!(
-        a.data.len() >= b.data.len()
-            && b.data.len() > 1
-            && b.data.last().unwrap().leading_zeros() == 0
-    );
+fn div_rem_core(mut a: BigUint, b: &[BigDigit]) -> (BigUint, BigUint) {
+    debug_assert!(a.data.len() >= b.len() && b.len() > 1);
+    debug_assert!(b.last().unwrap().leading_zeros() == 0);
 
     // The algorithm works by incrementally calculating "guesses", q0, for the next digit of the
     // quotient. Once we have any number q0 such that (q0 << j) * b <= a, we can set
@@ -227,16 +232,16 @@
     let mut a0 = 0;
 
     // [b1, b0] are the two most significant digits of the divisor. They never change.
-    let b0 = *b.data.last().unwrap();
-    let b1 = b.data[b.data.len() - 2];
+    let b0 = *b.last().unwrap();
+    let b1 = b[b.len() - 2];
 
-    let q_len = a.data.len() - b.data.len() + 1;
+    let q_len = a.data.len() - b.len() + 1;
     let mut q = BigUint {
         data: vec![0; q_len],
     };
 
     for j in (0..q_len).rev() {
-        debug_assert!(a.data.len() == b.data.len() + j);
+        debug_assert!(a.data.len() == b.len() + j);
 
         let a1 = *a.data.last().unwrap();
         let a2 = a.data[a.data.len() - 2];
@@ -272,11 +277,11 @@
         // q0 is now either the correct quotient digit, or in rare cases 1 too large.
         // Subtract (q0 << j) from a. This may overflow, in which case we will have to correct.
 
-        let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], &b.data, q0);
+        let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], b, q0);
         if borrow > a0 {
             // q0 is too large. We need to add back one multiple of b.
             q0 -= 1;
-            borrow -= __add2(&mut a.data[j..], &b.data);
+            borrow -= __add2(&mut a.data[j..], b);
         }
         // The top digit of a, stored in a0, has now been zeroed.
         debug_assert!(borrow == a0);
@@ -290,7 +295,7 @@
     a.data.push(a0);
     a.normalize();
 
-    debug_assert!(a < *b);
+    debug_assert_eq!(cmp_slice(&a.data, b), Less);
 
     (q.normalized(), a)
 }
diff --git a/src/biguint/iter.rs b/src/biguint/iter.rs
index 5b9ceff..1e673e4 100644
--- a/src/biguint/iter.rs
+++ b/src/biguint/iter.rs
@@ -85,6 +85,30 @@
 }
 
 #[cfg(u64_digit)]
+impl DoubleEndedIterator for U32Digits<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        match self.data.split_last() {
+            Some((&last, data)) => {
+                let last_is_lo = self.last_hi_is_zero;
+                self.last_hi_is_zero = !last_is_lo;
+                if last_is_lo {
+                    self.data = data;
+                    if data.is_empty() && !self.next_is_lo {
+                        self.next_is_lo = true;
+                        None
+                    } else {
+                        Some(last as u32)
+                    }
+                } else {
+                    Some((last >> 32) as u32)
+                }
+            }
+            None => None,
+        }
+    }
+}
+
+#[cfg(u64_digit)]
 impl ExactSizeIterator for U32Digits<'_> {
     #[inline]
     fn len(&self) -> usize {
@@ -130,6 +154,13 @@
 }
 
 #[cfg(not(u64_digit))]
+impl DoubleEndedIterator for U32Digits<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.it.next_back().copied()
+    }
+}
+
+#[cfg(not(u64_digit))]
 impl ExactSizeIterator for U32Digits<'_> {
     #[inline]
     fn len(&self) -> usize {
@@ -182,6 +213,13 @@
     }
 }
 
+#[cfg(not(u64_digit))]
+impl DoubleEndedIterator for U64Digits<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.it.next_back().map(u32_chunk_to_u64)
+    }
+}
+
 #[cfg(u64_digit)]
 impl<'a> U64Digits<'a> {
     #[inline]
@@ -219,6 +257,13 @@
     }
 }
 
+#[cfg(u64_digit)]
+impl DoubleEndedIterator for U64Digits<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.it.next_back().cloned()
+    }
+}
+
 impl ExactSizeIterator for U64Digits<'_> {
     #[inline]
     fn len(&self) -> usize {
@@ -269,3 +314,45 @@
     assert_eq!(it.len(), 0);
     assert_eq!(it.next(), None);
 }
+
+#[test]
+fn test_iter_u32_digits_be() {
+    let n = super::BigUint::from(5u8);
+    let mut it = n.iter_u32_digits();
+    assert_eq!(it.len(), 1);
+    assert_eq!(it.next(), Some(5));
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+
+    let n = super::BigUint::from(112500000000u64);
+    let mut it = n.iter_u32_digits();
+    assert_eq!(it.len(), 2);
+    assert_eq!(it.next(), Some(830850304));
+    assert_eq!(it.len(), 1);
+    assert_eq!(it.next(), Some(26));
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn test_iter_u64_digits_be() {
+    let n = super::BigUint::from(5u8);
+    let mut it = n.iter_u64_digits();
+    assert_eq!(it.len(), 1);
+    assert_eq!(it.next_back(), Some(5));
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+
+    let n = super::BigUint::from(18_446_744_073_709_551_616u128);
+    let mut it = n.iter_u64_digits();
+    assert_eq!(it.len(), 2);
+    assert_eq!(it.next_back(), Some(1));
+    assert_eq!(it.len(), 1);
+    assert_eq!(it.next_back(), Some(0));
+    assert_eq!(it.len(), 0);
+    assert_eq!(it.next(), None);
+}
diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs
index aaa6934..581c9e1 100644
--- a/src/biguint/multiplication.rs
+++ b/src/biguint/multiplication.rs
@@ -2,7 +2,7 @@
 use super::subtraction::sub2;
 #[cfg(not(u64_digit))]
 use super::u32_from_u128;
-use super::{biguint_from_vec, cmp_slice, BigUint};
+use super::{biguint_from_vec, cmp_slice, BigUint, IntDigits};
 
 use crate::big_digit::{self, BigDigit, DoubleBigDigit};
 use crate::Sign::{self, Minus, NoSign, Plus};
@@ -11,7 +11,7 @@
 use core::cmp::Ordering;
 use core::iter::Product;
 use core::ops::{Mul, MulAssign};
-use num_traits::{CheckedMul, One, Zero};
+use num_traits::{CheckedMul, FromPrimitive, One, Zero};
 
 #[inline]
 pub(super) fn mac_with_carry(
@@ -66,7 +66,20 @@
 /// Three argument multiply accumulate:
 /// acc += b * c
 #[allow(clippy::many_single_char_names)]
-fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
+fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) {
+    // Least-significant zeros have no effect on the output.
+    if let Some(&0) = b.first() {
+        let nz = b.iter().position(|&d| d != 0).unwrap();
+        b = &b[nz..];
+        acc = &mut acc[nz..];
+    }
+    if let Some(&0) = c.first() {
+        let nz = c.iter().position(|&d| d != 0).unwrap();
+        c = &c[nz..];
+        acc = &mut acc[nz..];
+    }
+
+    let acc = acc;
     let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) };
 
     // We use three algorithms for different input sizes.
@@ -155,28 +168,28 @@
 
         // We reuse the same BigUint for all the intermediate multiplies and have to size p
         // appropriately here: x1.len() >= x0.len and y1.len() >= y0.len():
-        let len = x1.len() + y1.len() + 1;
+        let len = x1.len() + y1.len();
         let mut p = BigUint { data: vec![0; len] };
 
         // p2 = x1 * y1
-        mac3(&mut p.data[..], x1, y1);
+        mac3(&mut p.data, x1, y1);
 
         // Not required, but the adds go faster if we drop any unneeded 0s from the end:
         p.normalize();
 
-        add2(&mut acc[b..], &p.data[..]);
-        add2(&mut acc[b * 2..], &p.data[..]);
+        add2(&mut acc[b..], &p.data);
+        add2(&mut acc[b * 2..], &p.data);
 
         // Zero out p before the next multiply:
         p.data.truncate(0);
         p.data.resize(len, 0);
 
         // p0 = x0 * y0
-        mac3(&mut p.data[..], x0, y0);
+        mac3(&mut p.data, x0, y0);
         p.normalize();
 
-        add2(&mut acc[..], &p.data[..]);
-        add2(&mut acc[b..], &p.data[..]);
+        add2(acc, &p.data);
+        add2(&mut acc[b..], &p.data);
 
         // p1 = (x1 - x0) * (y1 - y0)
         // We do this one last, since it may be negative and acc can't ever be negative:
@@ -188,13 +201,13 @@
                 p.data.truncate(0);
                 p.data.resize(len, 0);
 
-                mac3(&mut p.data[..], &j0.data[..], &j1.data[..]);
+                mac3(&mut p.data, &j0.data, &j1.data);
                 p.normalize();
 
-                sub2(&mut acc[b..], &p.data[..]);
+                sub2(&mut acc[b..], &p.data);
             }
             Minus => {
-                mac3(&mut acc[b..], &j0.data[..], &j1.data[..]);
+                mac3(&mut acc[b..], &j0.data, &j1.data);
             }
             NoSign => (),
         }
@@ -299,47 +312,73 @@
         //
         // This particular sequence is given by Bodrato and is an interpolation
         // of the above equations.
-        let mut comp3: BigInt = (r3 - &r1) / 3;
-        let mut comp1: BigInt = (r1 - &r2) / 2;
+        let mut comp3: BigInt = (r3 - &r1) / 3u32;
+        let mut comp1: BigInt = (r1 - &r2) >> 1;
         let mut comp2: BigInt = r2 - &r0;
-        comp3 = (&comp2 - comp3) / 2 + &r4 * 2;
+        comp3 = ((&comp2 - comp3) >> 1) + (&r4 << 1);
         comp2 += &comp1 - &r4;
         comp1 -= &comp3;
 
         // Recomposition. The coefficients of the polynomial are now known.
         //
         // Evaluate at w(t) where t is our given base to get the result.
-        let bits = u64::from(big_digit::BITS) * i as u64;
-        let result = r0
-            + (comp1 << bits)
-            + (comp2 << (2 * bits))
-            + (comp3 << (3 * bits))
-            + (r4 << (4 * bits));
-        let result_pos = result.to_biguint().unwrap();
-        add2(&mut acc[..], &result_pos.data);
+        //
+        //     let bits = u64::from(big_digit::BITS) * i as u64;
+        //     let result = r0
+        //         + (comp1 << bits)
+        //         + (comp2 << (2 * bits))
+        //         + (comp3 << (3 * bits))
+        //         + (r4 << (4 * bits));
+        //     let result_pos = result.to_biguint().unwrap();
+        //     add2(&mut acc[..], &result_pos.data);
+        //
+        // But with less intermediate copying:
+        for (j, result) in [&r0, &comp1, &comp2, &comp3, &r4].iter().enumerate().rev() {
+            match result.sign() {
+                Plus => add2(&mut acc[i * j..], result.digits()),
+                Minus => sub2(&mut acc[i * j..], result.digits()),
+                NoSign => {}
+            }
+        }
     }
 }
 
 fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint {
-    let len = x.len() + y.len() + 1;
+    let len = x.len() + y.len();
     let mut prod = BigUint { data: vec![0; len] };
 
-    mac3(&mut prod.data[..], x, y);
+    mac3(&mut prod.data, x, y);
     prod.normalized()
 }
 
-fn scalar_mul(a: &mut [BigDigit], b: BigDigit) -> BigDigit {
-    let mut carry = 0;
-    for a in a.iter_mut() {
-        *a = mul_with_carry(*a, b, &mut carry);
+fn scalar_mul(a: &mut BigUint, b: BigDigit) {
+    match b {
+        0 => a.set_zero(),
+        1 => {}
+        _ => {
+            if b.is_power_of_two() {
+                *a <<= b.trailing_zeros();
+            } else {
+                let mut carry = 0;
+                for a in a.data.iter_mut() {
+                    *a = mul_with_carry(*a, b, &mut carry);
+                }
+                if carry != 0 {
+                    a.data.push(carry as BigDigit);
+                }
+            }
+        }
     }
-    carry as BigDigit
 }
 
 fn sub_sign(mut a: &[BigDigit], mut b: &[BigDigit]) -> (Sign, BigUint) {
     // Normalize:
-    a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
-    b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
+    if let Some(&0) = a.last() {
+        a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
+    }
+    if let Some(&0) = b.last() {
+        b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
+    }
 
     match cmp_slice(a, b) {
         Ordering::Greater => {
@@ -356,22 +395,55 @@
     }
 }
 
-forward_all_binop_to_ref_ref!(impl Mul for BigUint, mul);
-forward_val_assign!(impl MulAssign for BigUint, mul_assign);
+macro_rules! impl_mul {
+    ($(impl<$($a:lifetime),*> Mul<$Other:ty> for $Self:ty;)*) => {$(
+        impl<$($a),*> Mul<$Other> for $Self {
+            type Output = BigUint;
 
-impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint {
-    type Output = BigUint;
-
-    #[inline]
-    fn mul(self, other: &BigUint) -> BigUint {
-        mul3(&self.data[..], &other.data[..])
-    }
+            #[inline]
+            fn mul(self, other: $Other) -> BigUint {
+                match (&*self.data, &*other.data) {
+                    // multiply by zero
+                    (&[], _) | (_, &[]) => BigUint::zero(),
+                    // multiply by a scalar
+                    (_, &[digit]) => self * digit,
+                    (&[digit], _) => other * digit,
+                    // full multiplication
+                    (x, y) => mul3(x, y),
+                }
+            }
+        }
+    )*}
 }
-impl<'a> MulAssign<&'a BigUint> for BigUint {
-    #[inline]
-    fn mul_assign(&mut self, other: &'a BigUint) {
-        *self = &*self * other
-    }
+impl_mul! {
+    impl<> Mul<BigUint> for BigUint;
+    impl<'b> Mul<&'b BigUint> for BigUint;
+    impl<'a> Mul<BigUint> for &'a BigUint;
+    impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint;
+}
+
+macro_rules! impl_mul_assign {
+    ($(impl<$($a:lifetime),*> MulAssign<$Other:ty> for BigUint;)*) => {$(
+        impl<$($a),*> MulAssign<$Other> for BigUint {
+            #[inline]
+            fn mul_assign(&mut self, other: $Other) {
+                match (&*self.data, &*other.data) {
+                    // multiply by zero
+                    (&[], _) => {},
+                    (_, &[]) => self.set_zero(),
+                    // multiply by a scalar
+                    (_, &[digit]) => *self *= digit,
+                    (&[digit], _) => *self = other * digit,
+                    // full multiplication
+                    (x, y) => *self = mul3(x, y),
+                }
+            }
+        }
+    )*}
+}
+impl_mul_assign! {
+    impl<> MulAssign<BigUint> for BigUint;
+    impl<'a> MulAssign<&'a BigUint> for BigUint;
 }
 
 promote_unsigned_scalars!(impl Mul for BigUint, mul);
@@ -392,14 +464,7 @@
 impl MulAssign<u32> for BigUint {
     #[inline]
     fn mul_assign(&mut self, other: u32) {
-        if other == 0 {
-            self.data.clear();
-        } else {
-            let carry = scalar_mul(&mut self.data[..], other as BigDigit);
-            if carry != 0 {
-                self.data.push(carry);
-            }
-        }
+        scalar_mul(self, other as BigDigit);
     }
 }
 
@@ -416,27 +481,18 @@
     #[cfg(not(u64_digit))]
     #[inline]
     fn mul_assign(&mut self, other: u64) {
-        if other == 0 {
-            self.data.clear();
-        } else if other <= u64::from(BigDigit::max_value()) {
-            *self *= other as BigDigit
+        if let Some(other) = BigDigit::from_u64(other) {
+            scalar_mul(self, other);
         } else {
             let (hi, lo) = big_digit::from_doublebigdigit(other);
-            *self = mul3(&self.data[..], &[lo, hi])
+            *self = mul3(&self.data, &[lo, hi]);
         }
     }
 
     #[cfg(u64_digit)]
     #[inline]
     fn mul_assign(&mut self, other: u64) {
-        if other == 0 {
-            self.data.clear();
-        } else {
-            let carry = scalar_mul(&mut self.data[..], other as BigDigit);
-            if carry != 0 {
-                self.data.push(carry);
-            }
-        }
+        scalar_mul(self, other);
     }
 }
 
@@ -454,26 +510,25 @@
     #[cfg(not(u64_digit))]
     #[inline]
     fn mul_assign(&mut self, other: u128) {
-        if other == 0 {
-            self.data.clear();
-        } else if other <= u128::from(BigDigit::max_value()) {
-            *self *= other as BigDigit
+        if let Some(other) = BigDigit::from_u128(other) {
+            scalar_mul(self, other);
         } else {
-            let (a, b, c, d) = u32_from_u128(other);
-            *self = mul3(&self.data[..], &[d, c, b, a])
+            *self = match u32_from_u128(other) {
+                (0, 0, c, d) => mul3(&self.data, &[d, c]),
+                (0, b, c, d) => mul3(&self.data, &[d, c, b]),
+                (a, b, c, d) => mul3(&self.data, &[d, c, b, a]),
+            };
         }
     }
 
     #[cfg(u64_digit)]
     #[inline]
     fn mul_assign(&mut self, other: u128) {
-        if other == 0 {
-            self.data.clear();
-        } else if other <= BigDigit::max_value() as u128 {
-            *self *= other as BigDigit
+        if let Some(other) = BigDigit::from_u128(other) {
+            scalar_mul(self, other);
         } else {
             let (hi, lo) = big_digit::from_doublebigdigit(other);
-            *self = mul3(&self.data[..], &[lo, hi])
+            *self = mul3(&self.data, &[lo, hi]);
         }
     }
 }
@@ -502,6 +557,6 @@
     let a_i = BigInt::from(a.clone());
     let b_i = BigInt::from(b.clone());
 
-    assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i);
-    assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i);
+    assert_eq!(sub_sign_i(&a.data, &b.data), &a_i - &b_i);
+    assert_eq!(sub_sign_i(&b.data, &a.data), &b_i - &a_i);
 }
diff --git a/src/biguint/power.rs b/src/biguint/power.rs
index 44b3814..d24651b 100644
--- a/src/biguint/power.rs
+++ b/src/biguint/power.rs
@@ -85,7 +85,7 @@
                     exp >>= 1;
                     base = &base * &base;
                     if exp & 1 == 1 {
-                        acc = &acc * &base;
+                        acc *= &base;
                     }
                 }
                 acc
@@ -185,7 +185,8 @@
         let mut unit = |exp_is_odd| {
             base = &base * &base % modulus;
             if exp_is_odd {
-                acc = &acc * &base % modulus;
+                acc *= &base;
+                acc %= modulus;
             }
         };
 
diff --git a/src/biguint/serde.rs b/src/biguint/serde.rs
index 573b0a7..ed663c6 100644
--- a/src/biguint/serde.rs
+++ b/src/biguint/serde.rs
@@ -89,7 +89,7 @@
         use num_integer::Integer;
 
         let u32_len = seq.size_hint().unwrap_or(0);
-        let len = u32_len.div_ceil(&2);
+        let len = Integer::div_ceil(&u32_len, &2);
         let mut data = Vec::with_capacity(len);
 
         while let Some(lo) = seq.next_element::<u32>()? {
diff --git a/tests/bigint.rs b/tests/bigint.rs
index 8eff5ba..f244bc4 100644
--- a/tests/bigint.rs
+++ b/tests/bigint.rs
@@ -1306,6 +1306,10 @@
     check!(u32);
     check!(u64);
     check!(usize);
+
+    let pow_1e10000 = BigInt::from(10u32).pow(10_000_u32);
+    let manual_1e10000 = repeat(10u32).take(10_000).product::<BigInt>();
+    assert!(manual_1e10000 == pow_1e10000);
 }
 
 #[test]
diff --git a/tests/bigint_scalar.rs b/tests/bigint_scalar.rs
index 485f2c5..10cf8d6 100644
--- a/tests/bigint_scalar.rs
+++ b/tests/bigint_scalar.rs
@@ -1,8 +1,9 @@
 use num_bigint::BigInt;
 use num_bigint::Sign::Plus;
-use num_traits::{Signed, ToPrimitive, Zero};
+use num_traits::{One, Signed, ToPrimitive, Zero};
 
 use std::ops::Neg;
+use std::panic::catch_unwind;
 
 mod consts;
 use crate::consts::*;
@@ -146,3 +147,12 @@
         }
     }
 }
+
+#[test]
+#[ignore = "Android sometimes uses panic_abort"]
+fn test_scalar_div_rem_zero() {
+    catch_unwind(|| BigInt::zero() / 0u32).unwrap_err();
+    catch_unwind(|| BigInt::zero() % 0u32).unwrap_err();
+    catch_unwind(|| BigInt::one() / 0u32).unwrap_err();
+    catch_unwind(|| BigInt::one() % 0u32).unwrap_err();
+}
diff --git a/tests/biguint.rs b/tests/biguint.rs
index 13b69f2..5a5979c 100644
--- a/tests/biguint.rs
+++ b/tests/biguint.rs
@@ -1601,6 +1601,16 @@
 }
 
 #[test]
+fn test_big_str() {
+    for n in 2..=20_u32 {
+        let x: BigUint = BigUint::from(n).pow(10_000_u32);
+        let s = x.to_string();
+        let y: BigUint = s.parse().unwrap();
+        assert_eq!(x, y);
+    }
+}
+
+#[test]
 fn test_lower_hex() {
     let a = BigUint::parse_bytes(b"A", 16).unwrap();
     let hello = BigUint::parse_bytes(b"22405534230753963835153736737", 10).unwrap();
@@ -1776,6 +1786,10 @@
     check!(u64);
     check!(u128);
     check!(usize);
+
+    let pow_1e10000 = BigUint::from(10u32).pow(10_000_u32);
+    let manual_1e10000 = repeat(10u32).take(10_000).product::<BigUint>();
+    assert!(manual_1e10000 == pow_1e10000);
 }
 
 #[test]
diff --git a/tests/biguint_scalar.rs b/tests/biguint_scalar.rs
index b6eadd9..114bbc6 100644
--- a/tests/biguint_scalar.rs
+++ b/tests/biguint_scalar.rs
@@ -1,5 +1,7 @@
 use num_bigint::BigUint;
-use num_traits::{ToPrimitive, Zero};
+use num_traits::{One, ToPrimitive, Zero};
+
+use std::panic::catch_unwind;
 
 mod consts;
 use crate::consts::*;
@@ -111,3 +113,12 @@
         }
     }
 }
+
+#[test]
+#[ignore = "Android sometimes uses panic_abort"]
+fn test_scalar_div_rem_zero() {
+    catch_unwind(|| BigUint::zero() / 0u32).unwrap_err();
+    catch_unwind(|| BigUint::zero() % 0u32).unwrap_err();
+    catch_unwind(|| BigUint::one() / 0u32).unwrap_err();
+    catch_unwind(|| BigUint::one() % 0u32).unwrap_err();
+}