ANDROID: 16K: Fix show maps CFI failure am: 626e5dce00
Original change: https://android-review.googlesource.com/c/kernel/common/+/3070751
Change-Id: I2a3d0390adb8bc90ba59a9f8f2f3d2ad449efc4c
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/OWNERS b/OWNERS
index 414594e..2efc343 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,12 +1,6 @@
set noparent
-
-# GKI Dr. No Enforcement is active on this branch. Approval of one of the Dr.
-# No reviewers is required following a regular CodeReview+2 vote of a code
-# reviewer.
-#
-# See the GKI release documentation (go/gki-dr-no) for further details.
-#
-# The expanded list of reviewers can be found at:
-# https://android.googlesource.com/kernel/common/+/android-mainline/OWNERS_DrNo
-
-include kernel/common:android-mainline:/OWNERS_DrNo
+adelva@google.com
+vnagarnaik@google.com
+udam@google.com
+tomcherry@google.com
+srichman@google.com
diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig
index 986af3f..84436bee 100644
--- a/arch/x86/configs/gki_defconfig
+++ b/arch/x86/configs/gki_defconfig
@@ -255,9 +255,11 @@
CONFIG_NFC=y
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_PCIEAER=y
CONFIG_PCI_MSI=y
CONFIG_PCI_IOV=y
+CONFIG_HOTPLUG_PCI=y
CONFIG_PCIE_DW_PLAT_EP=y
CONFIG_PCI_ENDPOINT=y
CONFIG_FW_LOADER_USER_HELPER=y
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 2b5e04c..2e544ec 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -39,6 +39,7 @@
#undef EMIT_VVAR
unsigned int vclocks_used __read_mostly;
+EXPORT_SYMBOL_GPL(vclocks_used);
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 4e1757b..8190135 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -4,7 +4,7 @@
#include <uapi/asm/setup.h>
-#define COMMAND_LINE_SIZE 2048
+#define COMMAND_LINE_SIZE 4096
#include <linux/linkage.h>
#include <asm/page_types.h>
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 11065dc..91a12b3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -109,6 +109,7 @@
return ret;
}
+EXPORT_SYMBOL_GPL(pvclock_clocksource_read);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
struct pvclock_vcpu_time_info *vcpu_time,
@@ -148,6 +149,7 @@
WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK));
pvti_cpu0_va = pvti;
}
+EXPORT_SYMBOL_GPL(pvclock_set_pvti_cpu0_va);
struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void)
{
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98838b7..5bdccf5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -70,6 +70,8 @@
#include <asm/proto.h>
#endif
+extern bool kiwi_fault_logging;
+
DECLARE_BITMAP(system_vectors, NR_VECTORS);
static inline void cond_local_irq_enable(struct pt_regs *regs)
@@ -169,6 +171,11 @@
static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
unsigned long trapnr, int signr, int sicode, void __user *addr)
{
+ if (unlikely(kiwi_fault_logging)) {
+ printk(KERN_ALERT "%s[%d]: do_error_trap for %s at %lx trapnr %lx signr %lx sicode %lx ip %px sp %px error %lx",
+ current->comm, task_pid_nr(current), str, addr, trapnr, signr,
+ sicode, (void *)regs->ip, (void *)regs->sp, error_code);
+ }
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
@@ -564,6 +571,12 @@
cond_local_irq_enable(regs);
+ if (unlikely(kiwi_fault_logging)) {
+ printk(KERN_ALERT "%s[%d]: exc_general_protection ip %px sp %px error %lx",
+ current->comm, task_pid_nr(current),
+ (void *)regs->ip, (void *)regs->sp, error_code);
+ }
+
if (static_cpu_has(X86_FEATURE_UMIP)) {
if (user_mode(regs) && fixup_umip_exception(regs))
goto exit;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e9afbf8..b78429e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1446,6 +1446,26 @@
trace_page_fault_kernel(address, regs, error_code);
}
+bool kiwi_fault_logging = false;
+
+/*
+ * kiwi_fault_logging=on|off
+ * Enables detailed fault logs.
+ *
+ * on Enable
+ * off Disable (default)
+ */
+static int __init parse_kiwi_fault_logging(char *str)
+{
+ if (!strcmp(str, "on")) {
+ kiwi_fault_logging = true;
+ } else if (!strcmp(str, "off")) {
+ kiwi_fault_logging = false;
+ }
+ return 0;
+}
+__setup("kiwi_fault_logging=", parse_kiwi_fault_logging);
+
static __always_inline void
handle_page_fault(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
@@ -1476,6 +1496,12 @@
unsigned long address = read_cr2();
irqentry_state_t state;
+ if (unlikely(kiwi_fault_logging)) {
+ printk(KERN_ALERT "%s[%d]: exc_page_fault at %lx ip %px sp %px error %lx",
+ current->comm, task_pid_nr(current), address,
+ (void *)regs->ip, (void *)regs->sp, error_code);
+ }
+
prefetchw(¤t->mm->mmap_lock);
/*
diff --git a/build.config.kiwi.x86_64 b/build.config.kiwi.x86_64
new file mode 100644
index 0000000..89fb7a3
--- /dev/null
+++ b/build.config.kiwi.x86_64
@@ -0,0 +1,11 @@
+. ${ROOT_DIR}/common-modules/virtual-device/build.config.virtual_device
+
+. ${ROOT_DIR}/${KERNEL_DIR}/build.config.x86_64
+
+DEFCONFIG=kiwi_x86_64_gki_defconfig
+PRE_DEFCONFIG_CMDS="KCONFIG_CONFIG=${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/${DEFCONFIG} ${ROOT_DIR}/${KERNEL_DIR}/scripts/kconfig/merge_config.sh -m -r ${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/gki_defconfig ${ROOT_DIR}/common-modules/virtual-device/virtual_device.fragment ${ROOT_DIR}/${KERNEL_DIR}/kiwi.fragment"
+POST_DEFCONFIG_CMDS="rm ${ROOT_DIR}/${KERNEL_DIR}/arch/x86/configs/${DEFCONFIG}"
+BUILD_VIRTIO_DXGKRNL=m
+
+# Not saving any kernel images. This build step is meant purely to generate the .kos.
+FILES=""
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 3ef5544..113b36e 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -139,4 +139,14 @@
This option adds a flavor of dma buffers that are backed by
virtio resources.
+config VIRTIO_PVCLOCK
+ tristate "Virtio pvclock driver"
+ depends on VIRTIO
+ depends on X86 && HYPERVISOR_GUEST
+ select PARAVIRT_CLOCK
+ help
+ This driver supports virtio pvclock devices.
+
+ If unsure, say M.
+
endif # VIRTIO_MENU
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 591e6f72..e29be13 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -9,3 +9,4 @@
obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
obj-$(CONFIG_VIRTIO_MEM) += virtio_mem.o
obj-$(CONFIG_VIRTIO_DMA_SHARED_BUFFER) += virtio_dma_buf.o
+obj-$(CONFIG_VIRTIO_PVCLOCK) += virtio_pvclock.o
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 3271822..107d327 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -6,6 +6,7 @@
* Copyright 2008 Rusty Russell IBM Corporation
*/
+#include "linux/dev_printk.h"
#include <linux/virtio.h>
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
@@ -26,8 +27,14 @@
* Balloon device works in 4K page units. So each page is pointed to by
* multiple balloon pages. All memory counters in this driver are in balloon
* page units.
+ *
+ * With hugepage allocation, we need to treat 1 page == 1 balloon page at least
+ * for x86 which is the current prototype target.
*/
+#define VIRTIO_BALLOON_PAGES_PER_PAGE 1
+/*
#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
+*/
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
/* Maximum number of (4k) pages to deflate on OOM notifications. */
#define VIRTIO_BALLOON_OOM_NR_PAGES 256
@@ -126,6 +133,9 @@
/* Free page reporting device */
struct virtqueue *reporting_vq;
struct page_reporting_dev_info pr_dev_info;
+
+ /* order to use for hugepage allocation, 0 => 4k, 1 => 8k, 2 => 16k, etc. */
+ unsigned int hugepage_order;
};
static const struct virtio_device_id id_table[] = {
@@ -204,8 +214,9 @@
* Note that the first pfn points at start of the page.
*/
for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
- pfns[i] = cpu_to_virtio32(vb->vdev,
- page_to_balloon_pfn(page) + i);
+ pfns[i] = cpu_to_virtio32(
+ vb->vdev,
+ (page_to_balloon_pfn(page) >> vb->hugepage_order) + i);
}
static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
@@ -220,7 +231,7 @@
for (num_pfns = 0; num_pfns < num;
num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- struct page *page = balloon_page_alloc();
+ struct page *page = balloon_page_alloc(vb->hugepage_order);
if (!page) {
dev_info_ratelimited(&vb->vdev->dev,
@@ -245,7 +256,7 @@
vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
if (!virtio_has_feature(vb->vdev,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- adjust_managed_page_count(page, -1);
+ adjust_managed_page_count(page, -(1 << vb->hugepage_order));
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE;
}
@@ -266,7 +277,7 @@
list_for_each_entry_safe(page, next, pages, lru) {
if (!virtio_has_feature(vb->vdev,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
- adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(page, (1 << vb->hugepage_order));
list_del(&page->lru);
put_page(page); /* balloon reference */
}
@@ -476,17 +487,14 @@
stats_handle_request(vb);
}
-static void update_balloon_size_func(struct work_struct *work)
+static s64 update_balloon_size_internal(struct virtio_balloon *vb)
{
- struct virtio_balloon *vb;
s64 diff;
- vb = container_of(work, struct virtio_balloon,
- update_balloon_size_work);
diff = towards_target(vb);
if (!diff)
- return;
+ return diff;
if (diff > 0)
diff -= fill_balloon(vb, diff);
@@ -494,7 +502,17 @@
diff += leak_balloon(vb, -diff);
update_balloon_size(vb);
- if (diff)
+ return diff;
+}
+
+static void update_balloon_size_func(struct work_struct *work)
+{
+ struct virtio_balloon *vb;
+
+ vb = container_of(work, struct virtio_balloon,
+ update_balloon_size_work);
+
+ if (update_balloon_size_internal(vb))
queue_work(system_freezable_wq, work);
}
@@ -905,6 +923,10 @@
mutex_init(&vb->balloon_lock);
init_waitqueue_head(&vb->acked);
vb->vdev = vdev;
+ /* Set the hugepage_order provided by the hypervisor */
+ virtio_cread_le(vb->vdev, struct virtio_balloon_config, hugepage_order,
+ &vb->hugepage_order);
+ dev_info_ratelimited(&vdev->dev, "allocation using order=%d", vb->hugepage_order);
balloon_devinfo_init(&vb->vb_dev_info);
@@ -1004,8 +1026,23 @@
virtio_device_ready(vdev);
- if (towards_target(vb))
- virtballoon_changed(vdev);
+ if (towards_target(vb)) {
+ s64 diff = -1;
+ do {
+ s64 new_diff = update_balloon_size_internal(vb);
+ /* The BUG below will be triggered if the balloon couldn't make any
+ * progress with the initial inflation.
+ * Explicit crash because there shouldn't be any failure at this stage.
+ * The reasons this BUG could trigger if:
+ * * the initial target is too aggressive
+ * * virtio-balloon is not being loaded at the 1st stage init
+ * Both are considered programming bugs and should be fixed.
+ */
+ BUG_ON(new_diff == diff);
+ diff = new_diff;
+ } while (diff != 0);
+ }
+ dev_info_ratelimited(&vdev->dev, "initial allocation done");
return 0;
out_unregister_oom:
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 427d58c..be83944 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -9,6 +9,8 @@
#include <uapi/linux/virtio_ids.h>
#include <uapi/linux/virtio_input.h>
+#define RUMBLE_EFFECT_TYPE 0xFFFF
+
struct virtio_input {
struct virtio_device *vdev;
struct input_dev *idev;
@@ -209,12 +211,70 @@
spin_unlock_irqrestore(&vi->lock, flags);
}
+static int virtinput_ff_upload(
+ struct input_dev *dev, struct ff_effect *effect, struct ff_effect *old) {
+ struct virtio_input *vi;
+ s32 value;
+ int ret;
+
+ if (effect->type == FF_RUMBLE) {
+ vi = input_get_drvdata(dev);
+ pr_debug("virtinput_ff_upload FF_RUMBLE strong: %d, weak: %d",
+ effect->u.rumble.strong_magnitude, effect->u.rumble.weak_magnitude);
+
+ value = effect->u.rumble.strong_magnitude << 16 | effect->u.rumble.weak_magnitude;
+ ret = virtinput_send_status(vi, RUMBLE_EFFECT_TYPE, effect->id, value);
+ if (ret != 0) {
+ dev_err(&dev->dev, "virtinput_ff_upload virtinput_send_status error: %d", ret);
+ }
+ return ret;
+ } else {
+ dev_warn(&dev->dev,
+ "virtinput_ff_upload did not receive effect type FF_RUMBLE. \
+ Instead received event type: %d",
+ effect->type);
+ }
+ return 0;
+}
+
+static int virtinput_ff_erase(struct input_dev *dev, int effect_id) {
+ dev_info(&dev->dev, "virtinput_ff_erase called with effect_id: %d", effect_id);
+ return 0;
+}
+
+static int virtinput_ff_playback(struct input_dev *dev, int effect_id, int value) {
+ struct virtio_input *vi;
+ int ret;
+ pr_debug("virtinput_ff_playback called with effect_id: %d, value: %d", effect_id, value);
+ vi = input_get_drvdata(dev);
+
+ ret = virtinput_send_status(vi, EV_FF, effect_id, value);
+ if (ret != 0) {
+ dev_info(&dev->dev, "virtinput_ff_playback virtinput_send_status error: %d", ret);
+ }
+ return ret;
+}
+
+static void virtinput_ff_set_gain(struct input_dev *dev, u16 gain) {
+ dev_info(&dev->dev, "virtinput_ff_set_gain called with gain: %d", gain);
+}
+
+static void virtinput_ff_set_autocenter(struct input_dev *dev, u16 magnitude) {
+ dev_info(&dev->dev, "virtinput_ff_set_autocenter called with magnitude: %d", magnitude);
+}
+
+static void virtinput_ff_destroy(struct ff_device *ff) {
+ printk(KERN_INFO "virtinput_ff_destroy called");
+}
+
static int virtinput_probe(struct virtio_device *vdev)
{
struct virtio_input *vi;
unsigned long flags;
size_t size;
int abs, err;
+ int error;
+ struct ff_device *ff;
if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
return -ENODEV;
@@ -292,6 +352,8 @@
vi->idev->ledbit, LED_CNT);
virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND,
vi->idev->sndbit, SND_CNT);
+ virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_FF,
+ vi->idev->ffbit, FF_CNT);
if (test_bit(EV_ABS, vi->idev->evbit)) {
for (abs = 0; abs < ABS_CNT; abs++) {
@@ -301,6 +363,25 @@
}
}
+ if (test_bit(FF_RUMBLE, vi->idev->ffbit)) {
+ dev_info(&vi->idev->dev, "Creating FF device for %s", vi->idev->name);
+ error = input_ff_create(vi->idev, FF_MAX_EFFECTS);
+
+ if (!error) {
+ ff = vi->idev->ff;
+
+ ff->upload = virtinput_ff_upload;
+ ff->erase = virtinput_ff_erase;
+ ff->playback = virtinput_ff_playback;
+ ff->set_gain = virtinput_ff_set_gain;
+ ff->set_autocenter = virtinput_ff_set_autocenter;
+ ff->destroy = virtinput_ff_destroy;
+ } else {
+ dev_err(&vi->idev->dev, "input_ff_create error: %d. Won't create ff device.", error);
+ }
+ }
+
+
virtio_device_ready(vdev);
vi->ready = true;
err = input_register_device(vi->idev);
diff --git a/drivers/virtio/virtio_pvclock.c b/drivers/virtio/virtio_pvclock.c
new file mode 100644
index 0000000..7d6fd0b
--- /dev/null
+++ b/drivers/virtio/virtio_pvclock.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio pvclock implementation.
+ *
+ * Copyright (C) 2021 Google, Inc.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/virtio.h>
+#include <linux/virtio_pvclock.h>
+#include <linux/workqueue.h>
+#include <asm/pvclock.h>
+
+enum virtio_pvclock_vq {
+ VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE,
+ VIRTIO_PVCLOCK_VQ_MAX
+};
+
+struct virtio_pvclock {
+ struct virtio_device *vdev;
+ struct virtqueue *set_pvclock_page_vq;
+ struct virtio_pvclock_set_pvclock_page_req set_page_request;
+
+ /* Updating the suspend time happens via scheduled work. */
+ struct work_struct update_suspend_time_work;
+ /* Creating the clocksource happens via scheduled work. */
+ struct work_struct create_clocksource_work;
+
+ /* Synchronize access/update to injected_suspend_ns. */
+ struct mutex inject_suspend_lock;
+ /* Total ns injected as sleep time. */
+ u64 injected_suspend_ns;
+
+ /* DMA address of virtio_pvclock_page. */
+ dma_addr_t pvclock_page_dma_addr;
+};
+
+/* CPU accessible pointer to pvclock page. */
+static struct pvclock_vsyscall_time_info *virtio_pvclock_page;
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PVCLOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+void update_suspend_time(struct work_struct *work)
+{
+ u64 suspend_ns, suspend_time_delta = 0;
+ struct timespec64 inject_time;
+ struct virtio_pvclock *vp;
+
+ vp = container_of(work, struct virtio_pvclock,
+ update_suspend_time_work);
+
+ virtio_cread(vp->vdev, struct virtio_pvclock_config, suspend_time_ns,
+ &suspend_ns);
+
+ mutex_lock(&vp->inject_suspend_lock);
+ if (suspend_ns > vp->injected_suspend_ns) {
+ suspend_time_delta = suspend_ns - vp->injected_suspend_ns;
+ vp->injected_suspend_ns = suspend_ns;
+ }
+ mutex_unlock(&vp->inject_suspend_lock);
+
+ if (suspend_time_delta == 0) {
+ dev_err(&vp->vdev->dev,
+ "%s: suspend_time_ns is less than injected_suspend_ns\n",
+ __func__);
+ return;
+ }
+
+ inject_time = ns_to_timespec64(suspend_time_delta);
+
+ timekeeping_inject_sleeptime64(&inject_time);
+
+ dev_info(&vp->vdev->dev, "injected sleeptime: %llu ns\n",
+ suspend_time_delta);
+}
+
+static u64 virtio_pvclock_clocksource_read(struct clocksource *cs)
+{
+ u64 ret;
+
+ preempt_disable_notrace();
+ ret = pvclock_clocksource_read(&virtio_pvclock_page->pvti);
+ preempt_enable_notrace();
+ return ret;
+}
+
+static int virtio_pvclock_cs_enable(struct clocksource *cs)
+{
+ if (cs->vdso_clock_mode == VDSO_CLOCKMODE_PVCLOCK)
+ vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
+ return 0;
+}
+
+static struct clocksource virtio_pvclock_clocksource = {
+ .name = "virtio-pvclock",
+ .rating = 200, /* default rating, updated by virtpvclock_validate */
+ .read = virtio_pvclock_clocksource_read,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .enable = virtio_pvclock_cs_enable,
+};
+
+static void set_pvclock_page_callback(struct virtqueue *vq)
+{
+ struct virtio_pvclock *vp = vq->vdev->priv;
+
+ if (vp->set_page_request.status != VIRTIO_PVCLOCK_S_OK) {
+ dev_err(&vq->vdev->dev,
+ "%s: set_pvclock_page req status is %u\n", __func__,
+ vp->set_page_request.status);
+ return;
+ }
+
+ /*
+ * Create the actual clocksource via a work queue because we're in an
+ * interrupt handler right now.
+ */
+ schedule_work(&vp->create_clocksource_work);
+}
+
+static void create_clocksource(struct work_struct *work)
+{
+ struct virtio_pvclock *vp;
+
+ vp = container_of(work, struct virtio_pvclock, create_clocksource_work);
+
+ /*
+ * VDSO pvclock can only be used if the TSCs are stable. The device also
+ * must set PVCLOCK_TSC_STABLE_BIT in the pvclock flags field.
+ */
+ if (virtio_has_feature(vp->vdev, VIRTIO_PVCLOCK_F_TSC_STABLE)) {
+ pvclock_set_pvti_cpu0_va(virtio_pvclock_page);
+ virtio_pvclock_clocksource.vdso_clock_mode =
+ VDSO_CLOCKMODE_PVCLOCK;
+ }
+
+ clocksource_register_hz(&virtio_pvclock_clocksource, NSEC_PER_SEC);
+
+ dev_info(&vp->vdev->dev, "registered clocksource\n");
+}
+
+static void virtpvclock_changed(struct virtio_device *vdev)
+{
+ struct virtio_pvclock *vp = vdev->priv;
+
+ schedule_work(&vp->update_suspend_time_work);
+}
+
+static int set_pvclock_page(struct virtio_pvclock *vp)
+{
+ struct scatterlist sg;
+ int err;
+
+ vp->set_page_request.pvclock_page_pa = vp->pvclock_page_dma_addr;
+ vp->set_page_request.system_time = ktime_get();
+ vp->set_page_request.tsc_timestamp = rdtsc_ordered();
+
+ sg_init_one(&sg, &vp->set_page_request, sizeof(vp->set_page_request));
+ err = virtqueue_add_outbuf(vp->set_pvclock_page_vq, &sg, 1, vp,
+ GFP_KERNEL);
+
+ if (err) {
+ dev_err(&vp->vdev->dev, "%s: failed to add output\n", __func__);
+ return err;
+ }
+ virtqueue_kick(vp->set_pvclock_page_vq);
+
+ return 0;
+}
+
+static int init_vqs(struct virtio_pvclock *vp)
+{
+ vq_callback_t *callbacks[VIRTIO_PVCLOCK_VQ_MAX];
+ struct virtqueue *vqs[VIRTIO_PVCLOCK_VQ_MAX];
+ const char *names[VIRTIO_PVCLOCK_VQ_MAX];
+ int err;
+
+ callbacks[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE] =
+ set_pvclock_page_callback;
+ names[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE] = "set_pvclock_page";
+
+ err = vp->vdev->config->find_vqs(vp->vdev, VIRTIO_PVCLOCK_VQ_MAX, vqs,
+ callbacks, names, NULL, NULL);
+ if (err)
+ return err;
+
+ vp->set_pvclock_page_vq = vqs[VIRTIO_PVCLOCK_VQ_SET_PVCLOCK_PAGE];
+
+ return set_pvclock_page(vp);
+}
+
+static int virtpvclock_probe(struct virtio_device *vdev)
+{
+ struct virtio_pvclock *vp;
+ int err;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s: config access disabled\n", __func__);
+ return -EINVAL;
+ }
+
+ vp = kzalloc(sizeof(*vp), GFP_KERNEL);
+ if (!vp) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ virtio_pvclock_page =
+ dma_alloc_coherent(vdev->dev.parent,
+ sizeof(*virtio_pvclock_page),
+ &vp->pvclock_page_dma_addr, GFP_KERNEL);
+
+ if (!virtio_pvclock_page) {
+ err = -ENOMEM;
+ goto out_free_vp;
+ }
+
+ INIT_WORK(&vp->update_suspend_time_work, update_suspend_time);
+ INIT_WORK(&vp->create_clocksource_work, create_clocksource);
+ mutex_init(&vp->inject_suspend_lock);
+
+ vp->vdev = vdev;
+ vdev->priv = vp;
+
+ err = init_vqs(vp);
+ if (err)
+ goto out_free_pvclock_page;
+
+ virtio_device_ready(vdev);
+
+ return 0;
+
+out_free_pvclock_page:
+ dma_free_coherent(vdev->dev.parent, sizeof(*virtio_pvclock_page),
+ virtio_pvclock_page, vp->pvclock_page_dma_addr);
+
+out_free_vp:
+ kfree(vp);
+out:
+ return err;
+}
+
+static void remove_common(struct virtio_pvclock *vp)
+{
+ /* Now we reset the device so we can clean up the queues. */
+ vp->vdev->config->reset(vp->vdev);
+
+ vp->vdev->config->del_vqs(vp->vdev);
+}
+
+static void virtpvclock_remove(struct virtio_device *vdev)
+{
+ struct virtio_pvclock *vp = vdev->priv;
+
+ remove_common(vp);
+
+ dma_free_coherent(vdev->dev.parent, sizeof(*virtio_pvclock_page),
+ virtio_pvclock_page, vp->pvclock_page_dma_addr);
+
+ kfree(vp);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtpvclock_freeze(struct virtio_device *vdev)
+{
+ struct virtio_pvclock *vp = vdev->priv;
+
+ /*
+ * The workqueue is already frozen by the PM core before this
+ * function is called.
+ */
+ remove_common(vp);
+ return 0;
+}
+
+static int virtpvclock_restore(struct virtio_device *vdev)
+{
+ int ret;
+
+ ret = init_vqs(vdev->priv);
+ if (ret)
+ return ret;
+
+ virtio_device_ready(vdev);
+
+ return 0;
+}
+#endif
+
+#define MAX_CLOCKSOURCE_RATING 450
+
+static int virtpvclock_validate(struct virtio_device *vdev)
+{
+ uint32_t rating;
+
+ if (!virtio_has_feature(vdev, VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING))
+ return 0;
+
+ rating = virtio_cread32(vdev, offsetof(struct virtio_pvclock_config,
+ clocksource_rating));
+ if (rating > MAX_CLOCKSOURCE_RATING) {
+ dev_warn(
+ &vdev->dev,
+ "device clocksource rating too high: %u, using max rating: %u\n",
+ rating, MAX_CLOCKSOURCE_RATING);
+ __virtio_clear_bit(vdev, VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING);
+ virtio_pvclock_clocksource.rating = (int)MAX_CLOCKSOURCE_RATING;
+ } else {
+ dev_info(&vdev->dev, "clocksource rating set to %u\n", rating);
+ virtio_pvclock_clocksource.rating = (int)rating;
+ }
+
+ return 0;
+}
+
+static unsigned int features[] = { VIRTIO_PVCLOCK_F_TSC_STABLE,
+ VIRTIO_PVCLOCK_F_INJECT_SLEEP,
+ VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING };
+
+static struct virtio_driver virtio_pvclock_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .validate = virtpvclock_validate,
+ .probe = virtpvclock_probe,
+ .remove = virtpvclock_remove,
+ .config_changed = virtpvclock_changed,
+#ifdef CONFIG_PM_SLEEP
+ .freeze = virtpvclock_freeze,
+ .restore = virtpvclock_restore,
+#endif
+};
+
+module_virtio_driver(virtio_pvclock_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio pvclock driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 338aa27..b09d46c 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -60,7 +60,7 @@
struct inode *inode;
};
-extern struct page *balloon_page_alloc(void);
+extern struct page *balloon_page_alloc(int order);
extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info,
struct page *page);
extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fbed5dd..c03c055 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -41,16 +41,17 @@
#define ___GFP_ACCOUNT 0x400000u
#define ___GFP_ZEROTAGS 0x800000u
#define ___GFP_SKIP_KASAN_POISON 0x1000000u
+#define ___GFP_NO_INIT_ON_ALLOC 0x2000000u
#ifdef CONFIG_CMA
-#define ___GFP_CMA 0x2000000u
+#define ___GFP_CMA 0x4000000u
#else
#define ___GFP_CMA 0
#endif
#ifdef CONFIG_LOCKDEP
#ifdef CONFIG_CMA
-#define ___GFP_NOLOCKDEP 0x4000000u
+#define ___GFP_NOLOCKDEP 0x8000000u
#else
-#define ___GFP_NOLOCKDEP 0x2000000u
+#define ___GFP_NOLOCKDEP 0x4000000u
#endif
#else
#define ___GFP_NOLOCKDEP 0
@@ -235,12 +236,17 @@
* %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned
* on deallocation. Typically used for userspace pages. Currently only has an
* effect in HW tags mode.
+ *
+ * %__GFP_NO_INIT_ON_ALLOC skips zeroing a page if `init_on_alloc` is set.
+ * Typically used to reduce performance hit for really large allocations that
+ * will not be accessed. e.g. for virtio-balloon allocations when inflating.
*/
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
+#define __GFP_NO_INIT_ON_ALLOC ((__force gfp_t)___GFP_NO_INIT_ON_ALLOC)
/* Disable lockdep for GFP context tracking */
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfefcfa..d376fde 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3095,7 +3095,7 @@
static inline bool want_init_on_alloc(gfp_t flags)
{
if (static_branch_unlikely(&init_on_alloc))
- return true;
+ return !(flags & __GFP_NO_INIT_ON_ALLOC);
return flags & __GFP_ZERO;
}
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a26dbef..9631bfb 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -49,6 +49,7 @@
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
+ {(unsigned long)__GFP_NO_INIT_ON_ALLOC, "__GFP_NO_INIT_ON_ALLOC"},\
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index ddaa45e..6f736e8 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -39,6 +39,7 @@
#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */
/* Size of a PFN in the balloon interface. */
+/* This is not useful with the change that allocates using hugepages */
#define VIRTIO_BALLOON_PFN_SHIFT 12
#define VIRTIO_BALLOON_CMD_ID_STOP 0
@@ -59,6 +60,8 @@
};
/* Stores PAGE_POISON if page poisoning is in use */
__le32 poison_val;
+ /* allocation size for balloon page, order to use, e.g. 0=>4k, 9=>2MB. etc.*/
+ __le32 hugepage_order;
};
#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index bc740d6..78a6384 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -49,5 +49,6 @@
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_PVCLOCK 61 /* virtio pvclock (experimental id) */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_pvclock.h b/include/uapi/linux/virtio_pvclock.h
new file mode 100644
index 0000000..808d47b
--- /dev/null
+++ b/include/uapi/linux/virtio_pvclock.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_PVCLOCK_H
+#define _LINUX_VIRTIO_PVCLOCK_H
+
+#include <linux/types.h>
+#include <linux/virtio_types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+
+/* The feature bitmap for virtio pvclock */
+/* TSC is stable */
+#define VIRTIO_PVCLOCK_F_TSC_STABLE 0
+/* Inject sleep for suspend */
+#define VIRTIO_PVCLOCK_F_INJECT_SLEEP 1
+/* Use device clocksource rating */
+#define VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING 2
+
+struct virtio_pvclock_config {
+ /* Number of ns the VM has been suspended without guest suspension. */
+ __u64 suspend_time_ns;
+ /* Device-suggested rating of the pvclock clocksource. */
+ __u32 clocksource_rating;
+ __u32 padding;
+};
+
+/* Status values for a virtio_pvclock request. */
+#define VIRTIO_PVCLOCK_S_OK 0
+#define VIRTIO_PVCLOCK_S_IOERR 1
+#define VIRTIO_PVCLOCK_S_UNSUPP 2
+
+/*
+ * Virtio pvclock set pvclock page request. Sets up the shared memory
+ * pvclock_vsyscall_time_info struct.
+ */
+struct virtio_pvclock_set_pvclock_page_req {
+ /* Physical address of pvclock_vsyscall_time_info. */
+ __u64 pvclock_page_pa;
+ /* Current system time. */
+ __u64 system_time;
+ /* Current tsc value. */
+ __u64 tsc_timestamp;
+ /* Status of this request, one of VIRTIO_PVCLOCK_S_*. */
+ __u8 status;
+ __u8 padding[7];
+};
+
+#endif /* _LINUX_VIRTIO_PVCLOCK_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d9b48f7..9e49c59 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1744,6 +1744,7 @@
/* signal hrtimers about time change */
clock_was_set();
}
+EXPORT_SYMBOL_GPL(timekeeping_inject_sleeptime64);
#endif
/**
diff --git a/kiwi.fragment b/kiwi.fragment
new file mode 100644
index 0000000..6e411cb
--- /dev/null
+++ b/kiwi.fragment
@@ -0,0 +1,2 @@
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_PVCLOCK=m
\ No newline at end of file
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 26de020..31019bb 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -6,6 +6,9 @@
*
* Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
*/
+#include "linux/gfp.h"
+#include "linux/jump_label.h"
+#include "linux/types.h"
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -119,13 +122,19 @@
* Driver must call balloon_page_enqueue before definitively removing the page
* from the guest system.
*
+ * @order: order to use when allocating a page. 0 => 4k, otherwise __GFP_COMP
+ * is used to allocate a hugepage.
* Return: struct page for the allocated page or NULL on allocation failure.
*/
-struct page *balloon_page_alloc(void)
+struct page *balloon_page_alloc(int order)
{
- struct page *page = alloc_page(balloon_mapping_gfp_mask() |
- __GFP_NOMEMALLOC | __GFP_NORETRY |
- __GFP_NOWARN);
+ struct page *page;
+ gfp_t gfp_mask = balloon_mapping_gfp_mask() | __GFP_NOMEMALLOC |
+ __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_INIT_ON_ALLOC;
+ if (order != 0) {
+ gfp_mask |= __GFP_COMP;
+ }
+ page = alloc_pages(gfp_mask, order);
return page;
}
EXPORT_SYMBOL_GPL(balloon_page_alloc);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a50dae2..daf6935 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -660,6 +660,7 @@
{ "__GFP_RECLAIM", "R" },
{ "__GFP_DIRECT_RECLAIM", "DR" },
{ "__GFP_KSWAPD_RECLAIM", "KR" },
+ { "__GFP_NO_INIT_ON_ALLOC", "NIA" },
};
static size_t max_gfp_len;