ANDROID: 16K: Fix show maps CFI failure am: 626e5dce00

Original change: https://android-review.googlesource.com/c/kernel/common/+/3070751

Change-Id: I2a3d0390adb8bc90ba59a9f8f2f3d2ad449efc4c
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/OWNERS b/OWNERS
index 414594e..2efc343 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,12 +1,6 @@
 set noparent
-
-# GKI Dr. No Enforcement is active on this branch. Approval of one of the Dr.
-# No reviewers is required following a regular CodeReview+2 vote of a code
-# reviewer.
-#
-# See the GKI release documentation (go/gki-dr-no) for further details.
-#
-# The expanded list of reviewers can be found at:
-# https://android.googlesource.com/kernel/common/+/android-mainline/OWNERS_DrNo
-
-include kernel/common:android-mainline:/OWNERS_DrNo
+adelva@google.com
+vnagarnaik@google.com
+udam@google.com
+tomcherry@google.com
+srichman@google.com
diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig
index 986af3f..84436bee 100644
--- a/arch/x86/configs/gki_defconfig
+++ b/arch/x86/configs/gki_defconfig
@@ -255,9 +255,11 @@
 CONFIG_NFC=y
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
 CONFIG_PCIEAER=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_IOV=y
+CONFIG_HOTPLUG_PCI=y
 CONFIG_PCIE_DW_PLAT_EP=y
 CONFIG_PCI_ENDPOINT=y
 CONFIG_FW_LOADER_USER_HELPER=y
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 2b5e04c..2e544ec 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -39,6 +39,7 @@
 #undef EMIT_VVAR
 
 unsigned int vclocks_used __read_mostly;
+EXPORT_SYMBOL_GPL(vclocks_used);
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 4e1757b..8190135 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -4,7 +4,7 @@
 
 #include <uapi/asm/setup.h>
 
-#define COMMAND_LINE_SIZE 2048
+#define COMMAND_LINE_SIZE 4096
 
 #include <linux/linkage.h>
 #include <asm/page_types.h>
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 11065dc..91a12b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -109,6 +109,7 @@
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(pvclock_clocksource_read);
 
 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 			    struct pvclock_vcpu_time_info *vcpu_time,
@@ -148,6 +149,7 @@
 	WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK));
 	pvti_cpu0_va = pvti;
 }
+EXPORT_SYMBOL_GPL(pvclock_set_pvti_cpu0_va);
 
 struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void)
 {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98838b7..5bdccf5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -70,6 +70,8 @@
 #include <asm/proto.h>
 #endif
 
+extern bool kiwi_fault_logging;
+
 DECLARE_BITMAP(system_vectors, NR_VECTORS);
 
 static inline void cond_local_irq_enable(struct pt_regs *regs)
@@ -169,6 +171,11 @@
 static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
 	unsigned long trapnr, int signr, int sicode, void __user *addr)
 {
+	if (unlikely(kiwi_fault_logging)) {
+	    printk(KERN_ALERT "%s[%d]: do_error_trap for %s at %lx trapnr %lx signr %lx sicode %lx ip %px sp %px error %lx",
+			current->comm, task_pid_nr(current), str, addr, trapnr, signr,
+			sicode, (void *)regs->ip, (void *)regs->sp, error_code);
+	}
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
@@ -564,6 +571,12 @@
 
 	cond_local_irq_enable(regs);
 
+	if (unlikely(kiwi_fault_logging)) {
+	    printk(KERN_ALERT "%s[%d]: exc_general_protection ip %px sp %px error %lx",
+			current->comm, task_pid_nr(current),
+			(void *)regs->ip, (void *)regs->sp, error_code);
+	}
+
 	if (static_cpu_has(X86_FEATURE_UMIP)) {
 		if (user_mode(regs) && fixup_umip_exception(regs))
 			goto exit;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e9afbf8..b78429e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1446,6 +1446,26 @@
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
+bool kiwi_fault_logging = false;
+
+/*
+ * kiwi_fault_logging=on|off
+ * Enables detailed fault logs.
+ *
+ * on	Enable
+ * off	Disable (default)
+ */
+static int __init parse_kiwi_fault_logging(char *str)
+{
+	if (!strcmp(str, "on")) {
+		kiwi_fault_logging = true;
+	} else if (!strcmp(str, "off")) {
+		kiwi_fault_logging = false;
+	}
+	return 0;
+}
+__setup("kiwi_fault_logging=", parse_kiwi_fault_logging);
+
 static __always_inline void
 handle_page_fault(struct pt_regs *regs, unsigned long error_code,
 			      unsigned long address)
@@ -1476,6 +1496,12 @@
 	unsigned long address = read_cr2();
 	irqentry_state_t state;
 
+	if (unlikely(kiwi_fault_logging)) {
+	    printk(KERN_ALERT "%s[%d]: exc_page_fault at %lx ip %px sp %px error %lx",
+			current->comm, task_pid_nr(current), address,
+			(void *)regs->ip, (void *)regs->sp, error_code);
+	}
+
 	prefetchw(&current->mm->mmap_lock);
 
 	/*
diff --git a/build.config.kiwi.x86_64 b/build.config.kiwi.x86_64
new file mode 100644
index 0000000..89fb7a3
--- /dev/null
+++ b/build.config.kiwi.x86_64
@@ -0,0 +1,11 @@
+. ${ROOT_DIR}/common-modules/virtual-device/build.config.virtual_device
+
+. ${ROOT_DIR}/${KERNEL_DIR}/build.config.x86_64
+
+DEFCONFIG=kiwi_x86_64_gki_defconfig
+PRE_DEFCONFIG_CMDS="KCONFIG_CONFIG=${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/${DEFCONFIG} ${ROOT_DIR}/${KERNEL_DIR}/scripts/kconfig/merge_config.sh -m -r ${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/gki_defconfig ${ROOT_DIR}/common-modules/virtual-device/virtual_device.fragment ${ROOT_DIR}/${KERNEL_DIR}/kiwi.fragment"
+POST_DEFCONFIG_CMDS="rm ${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/${DEFCONFIG}"
+BUILD_VIRTIO_DXGKRNL=m
+
+# Not saving any kernel images. This build step is meant purely to generate the .kos.
+FILES=""
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 3ef5544..113b36e 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -139,4 +139,14 @@
 	 This option adds a flavor of dma buffers that are backed by
 	 virtio resources.
 
+config VIRTIO_PVCLOCK
+	tristate "Virtio pvclock driver"
+	depends on VIRTIO
+	depends on X86 && HYPERVISOR_GUEST
+	select PARAVIRT_CLOCK
+	help
+	 This driver supports virtio pvclock devices.
+
+	 If unsure, say M.
+
 endif # VIRTIO_MENU
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 591e6f72..e29be13 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -9,3 +9,4 @@
 obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
 obj-$(CONFIG_VIRTIO_MEM) += virtio_mem.o
 obj-$(CONFIG_VIRTIO_DMA_SHARED_BUFFER) += virtio_dma_buf.o
+obj-$(CONFIG_VIRTIO_PVCLOCK) += virtio_pvclock.o
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 3271822..107d327 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -6,6 +6,7 @@
  *  Copyright 2008 Rusty Russell IBM Corporation
  */
 
+#include "linux/dev_printk.h"
 #include <linux/virtio.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
@@ -26,8 +27,14 @@
  * Balloon device works in 4K page units.  So each page is pointed to by
  * multiple balloon pages.  All memory counters in this driver are in balloon
  * page units.
+ *
+ * With hugepage allocation, we need to treat 1 page == 1 balloon page at least
+ * for x86 which is the current prototype target.
  */
+#define VIRTIO_BALLOON_PAGES_PER_PAGE 1
+/*
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
+*/
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 /* Maximum number of (4k) pages to deflate on OOM notifications. */
 #define VIRTIO_BALLOON_OOM_NR_PAGES 256
@@ -126,6 +133,9 @@
 	/* Free page reporting device */
 	struct virtqueue *reporting_vq;
 	struct page_reporting_dev_info pr_dev_info;
+
+	/* order to use for hugepage allocation, 0 => 4k, 1 => 8k, 2 => 16k, etc. */
+	unsigned int hugepage_order;
 };
 
 static const struct virtio_device_id id_table[] = {
@@ -204,8 +214,9 @@
 	 * Note that the first pfn points at start of the page.
 	 */
 	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
-		pfns[i] = cpu_to_virtio32(vb->vdev,
-					  page_to_balloon_pfn(page) + i);
+		pfns[i] = cpu_to_virtio32(
+			vb->vdev,
+			(page_to_balloon_pfn(page) >> vb->hugepage_order) + i);
 }
 
 static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
@@ -220,7 +231,7 @@
 
 	for (num_pfns = 0; num_pfns < num;
 	     num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
-		struct page *page = balloon_page_alloc();
+		struct page *page = balloon_page_alloc(vb->hugepage_order);
 
 		if (!page) {
 			dev_info_ratelimited(&vb->vdev->dev,
@@ -245,7 +256,7 @@
 		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
 		if (!virtio_has_feature(vb->vdev,
 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
-			adjust_managed_page_count(page, -1);
+			adjust_managed_page_count(page, -(1 << vb->hugepage_order));
 		vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
 
@@ -266,7 +277,7 @@
 	list_for_each_entry_safe(page, next, pages, lru) {
 		if (!virtio_has_feature(vb->vdev,
 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
-			adjust_managed_page_count(page, 1);
+			adjust_managed_page_count(page, (1 << vb->hugepage_order));
 		list_del(&page->lru);
 		put_page(page); /* balloon reference */
 	}
@@ -476,17 +487,14 @@
 	stats_handle_request(vb);
 }
 
-static void update_balloon_size_func(struct work_struct *work)
+static s64 update_balloon_size_internal(struct virtio_balloon *vb)
 {
-	struct virtio_balloon *vb;
 	s64 diff;
 
-	vb = container_of(work, struct virtio_balloon,
-			  update_balloon_size_work);
 	diff = towards_target(vb);
 
 	if (!diff)
-		return;
+		return diff;
 
 	if (diff > 0)
 		diff -= fill_balloon(vb, diff);
@@ -494,7 +502,17 @@
 		diff += leak_balloon(vb, -diff);
 	update_balloon_size(vb);
 
-	if (diff)
+	return diff;
+}
+
+static void update_balloon_size_func(struct work_struct *work)
+{
+	struct virtio_balloon *vb;
+
+	vb = container_of(work, struct virtio_balloon,
+			  update_balloon_size_work);
+
+	if (update_balloon_size_internal(vb))
 		queue_work(system_freezable_wq, work);
 }
 
@@ -905,6 +923,10 @@
 	mutex_init(&vb->balloon_lock);
 	init_waitqueue_head(&vb->acked);
 	vb->vdev = vdev;
+	/* Set the hugepage_order provided by the hypervisor */
+	virtio_cread_le(vb->vdev, struct virtio_balloon_config, hugepage_order,
+			&vb->hugepage_order);
+	dev_info_ratelimited(&vdev->dev, "allocation using order=%d", vb->hugepage_order);
 
 	balloon_devinfo_init(&vb->vb_dev_info);
 
@@ -1004,8 +1026,23 @@
 
 	virtio_device_ready(vdev);
 
-	if (towards_target(vb))
-		virtballoon_changed(vdev);
+	if (towards_target(vb)) {
+		s64 diff = -1;
+		do {
+			s64 new_diff = update_balloon_size_internal(vb);
+			/* The BUG below will be triggered if the balloon couldn't make any
+			 * progress with the initial inflation.
+			 * Explicit crash because there shouldn't be any failure at this stage.
+			 * The reasons this BUG could trigger if:
+			 *  * the initial target is too aggressive
+			 *  * virtio-balloon is not being loaded at the 1st stage init
+			 * Both are considered programming bugs and should be fixed.
+			 */
+			BUG_ON(new_diff == diff);
+			diff = new_diff;
+		} while (diff != 0);
+	}
+	dev_info_ratelimited(&vdev->dev, "initial allocation done");
 	return 0;
 
 out_unregister_oom:
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 427d58c..be83944 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -9,6 +9,8 @@
 #include <uapi/linux/virtio_ids.h>
 #include <uapi/linux/virtio_input.h>
 
+#define RUMBLE_EFFECT_TYPE 0xFFFF
+
 struct virtio_input {
 	struct virtio_device       *vdev;
 	struct input_dev           *idev;
@@ -209,12 +211,70 @@
 	spin_unlock_irqrestore(&vi->lock, flags);
 }
 
+static int virtinput_ff_upload(
+    struct input_dev *dev, struct ff_effect *effect, struct ff_effect *old) {
+  struct virtio_input *vi;
+  s32 value;
+  int ret;
+
+  if (effect->type == FF_RUMBLE) {
+    vi = input_get_drvdata(dev);
+    pr_debug("virtinput_ff_upload FF_RUMBLE strong: %d, weak: %d",
+             effect->u.rumble.strong_magnitude, effect->u.rumble.weak_magnitude);
+
+    value = effect->u.rumble.strong_magnitude << 16 | effect->u.rumble.weak_magnitude;
+    ret = virtinput_send_status(vi, RUMBLE_EFFECT_TYPE, effect->id, value);
+    if (ret != 0) {
+      dev_err(&dev->dev, "virtinput_ff_upload virtinput_send_status error: %d", ret);
+    }
+    return ret;
+  } else {
+    dev_warn(&dev->dev,
+             "virtinput_ff_upload did not receive effect type FF_RUMBLE. \
+             Instead received event type: %d",
+             effect->type);
+  }
+  return 0;
+}
+
+static int virtinput_ff_erase(struct input_dev *dev, int effect_id) {
+  dev_info(&dev->dev, "virtinput_ff_erase called with effect_id: %d", effect_id);
+  return 0;
+}
+
+static int virtinput_ff_playback(struct input_dev *dev, int effect_id, int value) {
+  struct virtio_input *vi;
+  int ret;
+  pr_debug("virtinput_ff_playback called with effect_id: %d, value: %d", effect_id, value);
+  vi = input_get_drvdata(dev);
+
+  ret = virtinput_send_status(vi, EV_FF, effect_id, value);
+  if (ret != 0) {
+    dev_info(&dev->dev, "virtinput_ff_playback virtinput_send_status error: %d", ret);
+  }
+  return ret;
+}
+
+static void virtinput_ff_set_gain(struct input_dev *dev, u16 gain) {
+  dev_info(&dev->dev, "virtinput_ff_set_gain called with gain: %d", gain);
+}
+
+static void virtinput_ff_set_autocenter(struct input_dev *dev, u16 magnitude) {
+  dev_info(&dev->dev, "virtinput_ff_set_autocenter called with magnitude: %d", magnitude);
+}
+
+static void virtinput_ff_destroy(struct ff_device *ff) {
+  printk(KERN_INFO "virtinput_ff_destroy called");
+}
+
 static int virtinput_probe(struct virtio_device *vdev)
 {
 	struct virtio_input *vi;
 	unsigned long flags;
 	size_t size;
 	int abs, err;
+        int error;
+        struct ff_device *ff;
 
 	if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
 		return -ENODEV;
@@ -292,6 +352,8 @@
 			   vi->idev->ledbit, LED_CNT);
 	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND,
 			   vi->idev->sndbit, SND_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_FF,
+			   vi->idev->ffbit, FF_CNT);
 
 	if (test_bit(EV_ABS, vi->idev->evbit)) {
 		for (abs = 0; abs < ABS_CNT; abs++) {
@@ -301,6 +363,25 @@
 		}
 	}
 
+        if (test_bit(FF_RUMBLE, vi->idev->ffbit)) {
+          dev_info(&vi->idev->dev, "Creating FF device for %s", vi->idev->name);
+          error = input_ff_create(vi->idev, FF_MAX_EFFECTS);
+
+          if (!error) {
+            ff = vi->idev->ff;
+
+            ff->upload = virtinput_ff_upload;
+            ff->erase = virtinput_ff_erase;
+            ff->playback = virtinput_ff_playback;
+            ff->set_gain = virtinput_ff_set_gain;
+            ff->set_autocenter = virtinput_ff_set_autocenter;
+            ff->destroy = virtinput_ff_destroy;
+          } else {
+            dev_err(&vi->idev->dev, "input_ff_create error: %d. Won't create ff device.", error);
+          }
+        }
+
+
 	virtio_device_ready(vdev);
 	vi->ready = true;
 	err = input_register_device(vi->idev);
diff --git a/drivers/virtio/virtio_pvclock.c b/drivers/virtio/virtio_pvclock.c
new file mode 100644
index 0000000..7d6fd0b
--- /dev/null
+++ b/drivers/virtio/virtio_pvclock.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio pvclock implementation.
+ *
+ *  Copyright (C) 2021 Google, Inc.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/virtio.h>
+#include <linux/virtio_pvclock.h>
+#include <linux/workqueue.h>
+#include <asm/pvclock.h>
+
+enum virtio_pvclock_vq {
+	VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE,
+	VIRTIO_PVCLOCK_VQ_MAX
+};
+
+struct virtio_pvclock {
+	struct virtio_device *vdev;
+	struct virtqueue *set_pvclock_page_vq;
+	struct virtio_pvclock_set_pvclock_page_req set_page_request;
+
+	/* Updating the suspend time happens via scheduled work. */
+	struct work_struct update_suspend_time_work;
+	/* Creating the clocksource happens via scheduled work. */
+	struct work_struct create_clocksource_work;
+
+	/* Synchronize access/update to injected_suspend_ns. */
+	struct mutex inject_suspend_lock;
+	/* Total ns injected as sleep time. */
+	u64 injected_suspend_ns;
+
+	/* DMA address of virtio_pvclock_page. */
+	dma_addr_t pvclock_page_dma_addr;
+};
+
+/* CPU accessible pointer to pvclock page. */
+static struct pvclock_vsyscall_time_info *virtio_pvclock_page;
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_PVCLOCK, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+void update_suspend_time(struct work_struct *work)
+{
+	u64 suspend_ns, suspend_time_delta = 0;
+	struct timespec64 inject_time;
+	struct virtio_pvclock *vp;
+
+	vp = container_of(work, struct virtio_pvclock,
+			  update_suspend_time_work);
+
+	virtio_cread(vp->vdev, struct virtio_pvclock_config, suspend_time_ns,
+		     &suspend_ns);
+
+	mutex_lock(&vp->inject_suspend_lock);
+	if (suspend_ns > vp->injected_suspend_ns) {
+		suspend_time_delta = suspend_ns - vp->injected_suspend_ns;
+		vp->injected_suspend_ns = suspend_ns;
+	}
+	mutex_unlock(&vp->inject_suspend_lock);
+
+	if (suspend_time_delta == 0) {
+		dev_err(&vp->vdev->dev,
+			"%s: suspend_time_ns is less than injected_suspend_ns\n",
+			__func__);
+		return;
+	}
+
+	inject_time = ns_to_timespec64(suspend_time_delta);
+
+	timekeeping_inject_sleeptime64(&inject_time);
+
+	dev_info(&vp->vdev->dev, "injected sleeptime: %llu ns\n",
+		 suspend_time_delta);
+}
+
+static u64 virtio_pvclock_clocksource_read(struct clocksource *cs)
+{
+	u64 ret;
+
+	preempt_disable_notrace();
+	ret = pvclock_clocksource_read(&virtio_pvclock_page->pvti);
+	preempt_enable_notrace();
+	return ret;
+}
+
+static int virtio_pvclock_cs_enable(struct clocksource *cs)
+{
+	if (cs->vdso_clock_mode == VDSO_CLOCKMODE_PVCLOCK)
+		vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
+	return 0;
+}
+
+static struct clocksource virtio_pvclock_clocksource = {
+	.name = "virtio-pvclock",
+	.rating = 200, /* default rating, updated by virtpvclock_validate */
+	.read = virtio_pvclock_clocksource_read,
+	.mask = CLOCKSOURCE_MASK(64),
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+	.enable = virtio_pvclock_cs_enable,
+};
+
+static void set_pvclock_page_callback(struct virtqueue *vq)
+{
+	struct virtio_pvclock *vp = vq->vdev->priv;
+
+	if (vp->set_page_request.status != VIRTIO_PVCLOCK_S_OK) {
+		dev_err(&vq->vdev->dev,
+			"%s: set_pvclock_page req status is %u\n", __func__,
+			vp->set_page_request.status);
+		return;
+	}
+
+	/*
+	 * Create the actual clocksource via a work queue because we're in an
+	 * interrupt handler right now.
+	 */
+	schedule_work(&vp->create_clocksource_work);
+}
+
+static void create_clocksource(struct work_struct *work)
+{
+	struct virtio_pvclock *vp;
+
+	vp = container_of(work, struct virtio_pvclock, create_clocksource_work);
+
+	/*
+	 * VDSO pvclock can only be used if the TSCs are stable. The device also
+	 * must set PVCLOCK_TSC_STABLE_BIT in the pvclock flags field.
+	 */
+	if (virtio_has_feature(vp->vdev, VIRTIO_PVCLOCK_F_TSC_STABLE)) {
+		pvclock_set_pvti_cpu0_va(virtio_pvclock_page);
+		virtio_pvclock_clocksource.vdso_clock_mode =
+			VDSO_CLOCKMODE_PVCLOCK;
+	}
+
+	clocksource_register_hz(&virtio_pvclock_clocksource, NSEC_PER_SEC);
+
+	dev_info(&vp->vdev->dev, "registered clocksource\n");
+}
+
+static void virtpvclock_changed(struct virtio_device *vdev)
+{
+	struct virtio_pvclock *vp = vdev->priv;
+
+	schedule_work(&vp->update_suspend_time_work);
+}
+
+static int set_pvclock_page(struct virtio_pvclock *vp)
+{
+	struct scatterlist sg;
+	int err;
+
+	vp->set_page_request.pvclock_page_pa = vp->pvclock_page_dma_addr;
+	vp->set_page_request.system_time = ktime_get();
+	vp->set_page_request.tsc_timestamp = rdtsc_ordered();
+
+	sg_init_one(&sg, &vp->set_page_request, sizeof(vp->set_page_request));
+	err = virtqueue_add_outbuf(vp->set_pvclock_page_vq, &sg, 1, vp,
+				   GFP_KERNEL);
+
+	if (err) {
+		dev_err(&vp->vdev->dev, "%s: failed to add output\n", __func__);
+		return err;
+	}
+	virtqueue_kick(vp->set_pvclock_page_vq);
+
+	return 0;
+}
+
+static int init_vqs(struct virtio_pvclock *vp)
+{
+	vq_callback_t *callbacks[VIRTIO_PVCLOCK_VQ_MAX];
+	struct virtqueue *vqs[VIRTIO_PVCLOCK_VQ_MAX];
+	const char *names[VIRTIO_PVCLOCK_VQ_MAX];
+	int err;
+
+	callbacks[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE] =
+		set_pvclock_page_callback;
+	names[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE] = "set_pvclock_page";
+
+	err = vp->vdev->config->find_vqs(vp->vdev, VIRTIO_PVCLOCK_VQ_MAX, vqs,
+					 callbacks, names, NULL, NULL);
+	if (err)
+		return err;
+
+	vp->set_pvclock_page_vq = vqs[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE];
+
+	return set_pvclock_page(vp);
+}
+
+static int virtpvclock_probe(struct virtio_device *vdev)
+{
+	struct virtio_pvclock *vp;
+	int err;
+
+	if (!vdev->config->get) {
+		dev_err(&vdev->dev, "%s: config access disabled\n", __func__);
+		return -EINVAL;
+	}
+
+	vp = kzalloc(sizeof(*vp), GFP_KERNEL);
+	if (!vp) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	virtio_pvclock_page =
+		dma_alloc_coherent(vdev->dev.parent,
+				   sizeof(*virtio_pvclock_page),
+				   &vp->pvclock_page_dma_addr, GFP_KERNEL);
+
+	if (!virtio_pvclock_page) {
+		err = -ENOMEM;
+		goto out_free_vp;
+	}
+
+	INIT_WORK(&vp->update_suspend_time_work, update_suspend_time);
+	INIT_WORK(&vp->create_clocksource_work, create_clocksource);
+	mutex_init(&vp->inject_suspend_lock);
+
+	vp->vdev = vdev;
+	vdev->priv = vp;
+
+	err = init_vqs(vp);
+	if (err)
+		goto out_free_pvclock_page;
+
+	virtio_device_ready(vdev);
+
+	return 0;
+
+out_free_pvclock_page:
+	dma_free_coherent(vdev->dev.parent, sizeof(*virtio_pvclock_page),
+			  virtio_pvclock_page, vp->pvclock_page_dma_addr);
+
+out_free_vp:
+	kfree(vp);
+out:
+	return err;
+}
+
+static void remove_common(struct virtio_pvclock *vp)
+{
+	/* Now we reset the device so we can clean up the queues. */
+	vp->vdev->config->reset(vp->vdev);
+
+	vp->vdev->config->del_vqs(vp->vdev);
+}
+
+static void virtpvclock_remove(struct virtio_device *vdev)
+{
+	struct virtio_pvclock *vp = vdev->priv;
+
+	remove_common(vp);
+
+	dma_free_coherent(vdev->dev.parent, sizeof(*virtio_pvclock_page),
+			  virtio_pvclock_page, vp->pvclock_page_dma_addr);
+
+	kfree(vp);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtpvclock_freeze(struct virtio_device *vdev)
+{
+	struct virtio_pvclock *vp = vdev->priv;
+
+	/*
+	 * The workqueue is already frozen by the PM core before this
+	 * function is called.
+	 */
+	remove_common(vp);
+	return 0;
+}
+
+static int virtpvclock_restore(struct virtio_device *vdev)
+{
+	int ret;
+
+	ret = init_vqs(vdev->priv);
+	if (ret)
+		return ret;
+
+	virtio_device_ready(vdev);
+
+	return 0;
+}
+#endif
+
+#define MAX_CLOCKSOURCE_RATING 450
+
+static int virtpvclock_validate(struct virtio_device *vdev)
+{
+	uint32_t rating;
+
+	if (!virtio_has_feature(vdev, VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING))
+		return 0;
+
+	rating = virtio_cread32(vdev, offsetof(struct virtio_pvclock_config,
+					       clocksource_rating));
+	if (rating > MAX_CLOCKSOURCE_RATING) {
+		dev_warn(
+			&vdev->dev,
+			"device clocksource rating too high: %u, using max rating: %u\n",
+			rating, MAX_CLOCKSOURCE_RATING);
+		__virtio_clear_bit(vdev, VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING);
+		virtio_pvclock_clocksource.rating = (int)MAX_CLOCKSOURCE_RATING;
+	} else {
+		dev_info(&vdev->dev, "clocksource rating set to %u\n", rating);
+		virtio_pvclock_clocksource.rating = (int)rating;
+	}
+
+	return 0;
+}
+
+static unsigned int features[] = { VIRTIO_PVCLOCK_F_TSC_STABLE,
+				   VIRTIO_PVCLOCK_F_INJECT_SLEEP,
+				   VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING };
+
+static struct virtio_driver virtio_pvclock_driver = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name = KBUILD_MODNAME,
+	.driver.owner = THIS_MODULE,
+	.id_table = id_table,
+	.validate = virtpvclock_validate,
+	.probe = virtpvclock_probe,
+	.remove = virtpvclock_remove,
+	.config_changed = virtpvclock_changed,
+#ifdef CONFIG_PM_SLEEP
+	.freeze = virtpvclock_freeze,
+	.restore = virtpvclock_restore,
+#endif
+};
+
+module_virtio_driver(virtio_pvclock_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio pvclock driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 338aa27..b09d46c 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -60,7 +60,7 @@
 	struct inode *inode;
 };
 
-extern struct page *balloon_page_alloc(void);
+extern struct page *balloon_page_alloc(int order);
 extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info,
 				 struct page *page);
 extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fbed5dd..c03c055 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -41,16 +41,17 @@
 #define ___GFP_ACCOUNT		0x400000u
 #define ___GFP_ZEROTAGS		0x800000u
 #define ___GFP_SKIP_KASAN_POISON	0x1000000u
+#define ___GFP_NO_INIT_ON_ALLOC	 0x2000000u
 #ifdef CONFIG_CMA
-#define ___GFP_CMA		0x2000000u
+#define ___GFP_CMA		0x4000000u
 #else
 #define ___GFP_CMA		0
 #endif
 #ifdef CONFIG_LOCKDEP
 #ifdef CONFIG_CMA
-#define ___GFP_NOLOCKDEP	0x4000000u
+#define ___GFP_NOLOCKDEP	0x8000000u
 #else
-#define ___GFP_NOLOCKDEP	0x2000000u
+#define ___GFP_NOLOCKDEP	0x4000000u
 #endif
 #else
 #define ___GFP_NOLOCKDEP	0
@@ -235,12 +236,17 @@
  * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned
  * on deallocation. Typically used for userspace pages. Currently only has an
  * effect in HW tags mode.
+ *
+ * %__GFP_NO_INIT_ON_ALLOC skips zeroing a page if `init_on_alloc` is set.
+ * Typically used to reduce performance hit for really large allocations that
+ * will not be accessed. e.g. for virtio-balloon allocations when inflating.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
 #define __GFP_ZEROTAGS	((__force gfp_t)___GFP_ZEROTAGS)
 #define __GFP_SKIP_KASAN_POISON	((__force gfp_t)___GFP_SKIP_KASAN_POISON)
+#define __GFP_NO_INIT_ON_ALLOC	((__force gfp_t)___GFP_NO_INIT_ON_ALLOC)
 
 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfefcfa..d376fde 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3095,7 +3095,7 @@
 static inline bool want_init_on_alloc(gfp_t flags)
 {
 	if (static_branch_unlikely(&init_on_alloc))
-		return true;
+		return !(flags & __GFP_NO_INIT_ON_ALLOC);
 	return flags & __GFP_ZERO;
 }
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a26dbef..9631bfb 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -49,6 +49,7 @@
 	{(unsigned long)__GFP_RECLAIM,		"__GFP_RECLAIM"},	\
 	{(unsigned long)__GFP_DIRECT_RECLAIM,	"__GFP_DIRECT_RECLAIM"},\
 	{(unsigned long)__GFP_KSWAPD_RECLAIM,	"__GFP_KSWAPD_RECLAIM"},\
+	{(unsigned long)__GFP_NO_INIT_ON_ALLOC,	"__GFP_NO_INIT_ON_ALLOC"},\
 	{(unsigned long)__GFP_ZEROTAGS,		"__GFP_ZEROTAGS"},	\
 	{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
 
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e..6f736e8 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -39,6 +39,7 @@
 #define VIRTIO_BALLOON_F_REPORTING	5 /* Page reporting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
+/* This is not useful with the change that allocates using hugepages */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
 #define VIRTIO_BALLOON_CMD_ID_STOP	0
@@ -59,6 +60,8 @@
 	};
 	/* Stores PAGE_POISON if page poisoning is in use */
 	__le32 poison_val;
+	/* allocation size for balloon page, order to use, e.g. 0=>4k, 9=>2MB. etc.*/
+	__le32 hugepage_order;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index bc740d6..78a6384 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -49,5 +49,6 @@
 #define VIRTIO_ID_FS           26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM         27 /* virtio pmem */
 #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_PVCLOCK        61 /* virtio pvclock (experimental id) */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_pvclock.h b/include/uapi/linux/virtio_pvclock.h
new file mode 100644
index 0000000..808d47b
--- /dev/null
+++ b/include/uapi/linux/virtio_pvclock.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_PVCLOCK_H
+#define _LINUX_VIRTIO_PVCLOCK_H
+
+#include <linux/types.h>
+#include <linux/virtio_types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+
+/* The feature bitmap for virtio pvclock */
+/* TSC is stable */
+#define VIRTIO_PVCLOCK_F_TSC_STABLE 0
+/* Inject sleep for suspend */
+#define VIRTIO_PVCLOCK_F_INJECT_SLEEP 1
+/* Use device clocksource rating */
+#define VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING 2
+
+struct virtio_pvclock_config {
+	/* Number of ns the VM has been suspended without guest suspension. */
+	__u64 suspend_time_ns;
+	/* Device-suggested rating of the pvclock clocksource. */
+	__u32 clocksource_rating;
+	__u32 padding;
+};
+
+/* Status values for a virtio_pvclock request. */
+#define VIRTIO_PVCLOCK_S_OK 0
+#define VIRTIO_PVCLOCK_S_IOERR 1
+#define VIRTIO_PVCLOCK_S_UNSUPP 2
+
+/*
+ * Virtio pvclock set pvclock page request. Sets up the shared memory
+ * pvclock_vsyscall_time_info struct.
+ */
+struct virtio_pvclock_set_pvclock_page_req {
+	/* Physical address of pvclock_vsyscall_time_info. */
+	__u64 pvclock_page_pa;
+	/* Current system time. */
+	__u64 system_time;
+	/* Current tsc value. */
+	__u64 tsc_timestamp;
+	/* Status of this request, one of VIRTIO_PVCLOCK_S_*. */
+	__u8 status;
+	__u8 padding[7];
+};
+
+#endif /* _LINUX_VIRTIO_PVCLOCK_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d9b48f7..9e49c59 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1744,6 +1744,7 @@
 	/* signal hrtimers about time change */
 	clock_was_set();
 }
+EXPORT_SYMBOL_GPL(timekeeping_inject_sleeptime64);
 #endif
 
 /**
diff --git a/kiwi.fragment b/kiwi.fragment
new file mode 100644
index 0000000..6e411cb
--- /dev/null
+++ b/kiwi.fragment
@@ -0,0 +1,2 @@
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_PVCLOCK=m
\ No newline at end of file
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 26de020..31019bb 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -6,6 +6,9 @@
  *
  * Copyright (C) 2012, Red Hat, Inc.  Rafael Aquini <aquini@redhat.com>
  */
+#include "linux/gfp.h"
+#include "linux/jump_label.h"
+#include "linux/types.h"
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/export.h>
@@ -119,13 +122,19 @@
  * Driver must call balloon_page_enqueue before definitively removing the page
  * from the guest system.
  *
+ * @order: order to use when allocating a page. 0 => 4k, otherwise __GFP_COMP
+ *         is used to allocate a hugepage.
  * Return: struct page for the allocated page or NULL on allocation failure.
  */
-struct page *balloon_page_alloc(void)
+struct page *balloon_page_alloc(int order)
 {
-	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
-				       __GFP_NOMEMALLOC | __GFP_NORETRY |
-				       __GFP_NOWARN);
+	struct page *page;
+	gfp_t gfp_mask = balloon_mapping_gfp_mask() | __GFP_NOMEMALLOC |
+			 __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_INIT_ON_ALLOC;
+	if (order != 0) {
+		gfp_mask |= __GFP_COMP;
+	}
+	page = alloc_pages(gfp_mask, order);
 	return page;
 }
 EXPORT_SYMBOL_GPL(balloon_page_alloc);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a50dae2..daf6935 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -660,6 +660,7 @@
 	{ "__GFP_RECLAIM",		"R" },
 	{ "__GFP_DIRECT_RECLAIM",	"DR" },
 	{ "__GFP_KSWAPD_RECLAIM",	"KR" },
+	{ "__GFP_NO_INIT_ON_ALLOC",	"NIA" },
 };
 
 static size_t max_gfp_len;