diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 985f6e1..974ca7e 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "967ca418c00d0fab0be6baab55a639af69519697"
+    "sha1": "b2b3e0c7eb38ff288d32726594d32a90d0e684fc"
   }
 }
diff --git a/Android.bp b/Android.bp
index e2a16fd..ed6c334 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,5 +1,43 @@
 // This file is generated by cargo2android.py --run --device --dependencies --features=alloc,default,tinyvec_macros.
 
+package {
+    default_applicable_licenses: ["external_rust_crates_tinyvec_license"],
+}
+
+// Added automatically by a large-scale-change that took the approach of
+// 'apply every license found to every target'. While this makes sure we respect
+// every license restriction, it may not be entirely correct.
+//
+// e.g. GPL in an MIT project might only apply to the contrib/ directory.
+//
+// Please consider splitting the single license below into multiple licenses,
+// taking care not to lose any license_kind information, and overriding the
+// default license using the 'licenses: [...]' property on targets as needed.
+//
+// For unused files, consider creating a 'fileGroup' with "//visibility:private"
+// to attach the license to, and including a comment whether the files may be
+// used in the current project.
+//
+// large-scale-change included anything that looked like it might be a license
+// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
+//
+// Please consider removing redundant or irrelevant files from 'license_text:'.
+// See: http://go/android-license-faq
+license {
+    name: "external_rust_crates_tinyvec_license",
+    visibility: [":__subpackages__"],
+    license_kinds: [
+        "SPDX-license-identifier-Apache-2.0",
+        "SPDX-license-identifier-MIT",
+        "SPDX-license-identifier-Zlib",
+    ],
+    license_text: [
+        "LICENSE-APACHE.md",
+        "LICENSE-MIT.md",
+        "LICENSE-ZLIB.md",
+    ],
+}
+
 rust_library {
     name: "libtinyvec",
     host_supported: true,
@@ -14,11 +52,6 @@
     rustlibs: [
         "libtinyvec_macros",
     ],
-    apex_available: [
-        "//apex_available:platform",
-        "com.android.resolv",
-    ],
-    min_sdk_version: "29",
 }
 
 // dependent_library ["feature_list"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 54dd314..6baa057 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## 1.1.1
+
+* [saethlin](https://github.com/saethlin) contributed many PRs (
+  [127](https://github.com/Lokathor/tinyvec/pull/127),
+  [128](https://github.com/Lokathor/tinyvec/pull/128),
+  [129](https://github.com/Lokathor/tinyvec/pull/129),
+  [131](https://github.com/Lokathor/tinyvec/pull/131),
+  [132](https://github.com/Lokathor/tinyvec/pull/132)
+  ) to help in several benchmarks.
+
 ## 1.1.0
 
 * [slightlyoutofphase](https://github.com/slightlyoutofphase)
diff --git a/Cargo.toml b/Cargo.toml
index e3fd556..c7d90dd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "tinyvec"
-version = "1.1.0"
+version = "1.1.1"
 authors = ["Lokathor <zefria@gmail.com>"]
 description = "`tinyvec` provides 100% safe vec-like data structures."
 keywords = ["vec", "no_std", "no-std"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 1a27866..ea42ed8 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,7 +1,7 @@
 [package]
 name = "tinyvec"
 description = "`tinyvec` provides 100% safe vec-like data structures."
-version = "1.1.0"
+version = "1.1.1"
 authors = ["Lokathor <zefria@gmail.com>"]
 edition = "2018"
 license = "Zlib OR Apache-2.0 OR MIT"
diff --git a/METADATA b/METADATA
index 73e220a..32fb288 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/tinyvec/tinyvec-1.1.0.crate"
+    value: "https://static.crates.io/crates/tinyvec/tinyvec-1.1.1.crate"
   }
-  version: "1.1.0"
+  version: "1.1.1"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2020
-    month: 12
-    day: 15
+    year: 2021
+    month: 2
+    day: 9
   }
 }
diff --git a/TEST_MAPPING b/TEST_MAPPING
new file mode 100644
index 0000000..ef16d49
--- /dev/null
+++ b/TEST_MAPPING
@@ -0,0 +1,14 @@
+// Generated by update_crate_tests.py for tests that depend on this crate.
+{
+  "presubmit": [
+    {
+      "name": "quiche_device_test_src_lib"
+    },
+    {
+      "name": "url_device_test_src_lib"
+    },
+    {
+      "name": "unicode-normalization_device_test_src_lib"
+    }
+  ]
+}
diff --git a/src/arrayvec.rs b/src/arrayvec.rs
index c744050..55e67db 100644
--- a/src/arrayvec.rs
+++ b/src/arrayvec.rs
@@ -403,10 +403,19 @@
   pub fn fill<I: IntoIterator<Item = A::Item>>(
     &mut self, iter: I,
   ) -> I::IntoIter {
+    // If this is written as a call to push for each element in iter, the
+    // compiler emits code that updates the length for every element. The
+    // additional complexity from that length update is worth nearly 2x in
+    // the runtime of this function.
     let mut iter = iter.into_iter();
-    for element in iter.by_ref().take(self.capacity() - self.len()) {
-      self.push(element);
+    let mut pushed = 0;
+    let to_take = self.capacity() - self.len();
+    let target = &mut self.data.as_slice_mut()[self.len as usize..];
+    for element in iter.by_ref().take(to_take) {
+      target[pushed] = element;
+      pushed += 1;
     }
+    self.len += pushed as u16;
     iter
   }
 
@@ -471,7 +480,9 @@
   /// assert_eq!(av.try_insert(4, "five"), Some("five"));
   /// ```
   #[inline]
-  pub fn try_insert(&mut self, index: usize, item: A::Item) -> Option<A::Item> {
+  pub fn try_insert(
+    &mut self, index: usize, mut item: A::Item,
+  ) -> Option<A::Item> {
     assert!(
       index <= self.len as usize,
       "ArrayVec::try_insert> index {} is out of bounds {}",
@@ -479,11 +490,23 @@
       self.len
     );
 
-    if let Some(x) = self.try_push(item) {
-      return Some(x);
+    // A previous implementation used self.try_push and slice::rotate_right
+    // rotate_right and rotate_left generate a huge amount of code and fail to
+    // inline; calling them here incurs the cost of all the cases they
+    // handle even though we're rotating a usually-small array by a constant
+    // 1 offset. This swap-based implementation benchmarks much better for
+    // small array lengths in particular.
+
+    if (self.len as usize) < A::CAPACITY {
+      self.len += 1;
+    } else {
+      return Some(item);
     }
 
-    self.as_mut_slice()[index..].rotate_right(1);
+    let target = &mut self.as_mut_slice()[index..];
+    for i in 0..target.len() {
+      core::mem::swap(&mut item, &mut target[i]);
+    }
     return None;
   }
 
@@ -601,7 +624,17 @@
   pub fn remove(&mut self, index: usize) -> A::Item {
     let targets: &mut [A::Item] = &mut self.deref_mut()[index..];
     let item = take(&mut targets[0]);
-    targets.rotate_left(1);
+
+    // A previous implementation used rotate_left
+    // rotate_right and rotate_left generate a huge amount of code and fail to
+    // inline; calling them here incurs the cost of all the cases they
+    // handle even though we're rotating a usually-small array by a constant
+    // 1 offset. This swap-based implementation benchmarks much better for
+    // small array lengths in particular.
+
+    for i in 0..targets.len() - 1 {
+      targets.swap(i, i + 1);
+    }
     self.len -= 1;
     item
   }
diff --git a/src/tinyvec.rs b/src/tinyvec.rs
index ed13433..c578ccb 100644
--- a/src/tinyvec.rs
+++ b/src/tinyvec.rs
@@ -676,18 +676,34 @@
   /// tv.push(4);
   /// assert_eq!(tv.as_slice(), &[1, 2, 3, 4]);
   /// ```
-  #[inline(always)]
+  #[inline]
   pub fn push(&mut self, val: A::Item) {
-    let arr = match self {
-      TinyVec::Heap(v) => return v.push(val),
-      TinyVec::Inline(a) => a,
-    };
-
-    if let Some(x) = arr.try_push(val) {
+    // The code path for moving the inline contents to the heap produces a lot
+    // of instructions, but we have a strong guarantee that this is a cold
+    // path. LLVM doesn't know this, inlines it, and this tends to cause a
+    // cascade of other bad inlining decisions because the body of push looks
+    // huge even though nearly every call executes the same few instructions.
+    //
+    // Moving the logic out of line with #[cold] causes the hot code to  be
+    // inlined together, and we take the extra cost of a function call only
+    // in rare cases.
+    #[cold]
+    fn drain_to_heap_and_push<A: Array>(
+      arr: &mut ArrayVec<A>, val: A::Item,
+    ) -> TinyVec<A> {
       /* Make the Vec twice the size to amortize the cost of draining */
       let mut v = arr.drain_to_vec_and_reserve(arr.len());
-      v.push(x);
-      *self = TinyVec::Heap(v);
+      v.push(val);
+      TinyVec::Heap(v)
+    }
+
+    match self {
+      TinyVec::Heap(v) => v.push(val),
+      TinyVec::Inline(arr) => {
+        if let Some(x) = arr.try_push(val) {
+          *self = drain_to_heap_and_push(arr, x);
+        }
+      }
     }
   }
 
@@ -1102,8 +1118,15 @@
       TinyVec::Heap(slice.into())
     } else {
       let mut arr = ArrayVec::new();
-      arr.extend_from_slice(slice);
-
+      // We do not use ArrayVec::extend_from_slice, because it looks like LLVM
+      // fails to deduplicate all the length-checking logic between the
+      // above if and the contents of that method, thus producing much
+      // slower code. Unlike many of the other optimizations in this
+      // crate, this one is worth keeping an eye on. I see no reason, for
+      // any element type, that these should produce different code. But
+      // they do. (rustc 1.51.0)
+      arr.set_len(slice.len());
+      arr.as_mut_slice().clone_from_slice(slice);
       TinyVec::Inline(arr)
     }
   }
