UPSTREAM: crosvm: snapshot-restore support for kvm clock

When kvm clock is available, Linux (6.1 at least) uses it for the
scheduling clock (among other things) even when the TSC is configured as
the primary clock source. When restoring a VM, if we don't make a
KVM_SET_CLOCK call, kvm clock's value will rollback to zero and cause
trouble.

`ClockState::flags` was deleted because it had no existing uses, it
isn't useable in any hypervisor generic way, and because it will result
in incorrect behavior for snapshotting. The set of valid flags returned
by KVM_GET_CLOCK is not the same as the set of valid flags that can be
passed to KVM_SET_CLOCK.

Test: snapshot and restore CF
Bug: 339527192
Change-Id: I57503e72499a44e98e0c6ca93eb8cb01cb21c16d
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5530677
Reviewed-by: Elie Kheirallah <khei@google.com>
Commit-Queue: Frederick Mayle <fmayle@google.com>
diff --git a/e2e_tests/tests/suspend_resume.rs b/e2e_tests/tests/suspend_resume.rs
index c814d39..6e64561 100644
--- a/e2e_tests/tests/suspend_resume.rs
+++ b/e2e_tests/tests/suspend_resume.rs
@@ -32,6 +32,9 @@
         .arg("vcpu*")
         .arg("--exclude")
         .arg("irqchip")
+        // KVM's pvclock seems to advance some even if the vCPUs haven't started yet.
+        .arg("--exclude")
+        .arg("pvclock")
         .arg(a)
         .arg(b)
         .output()
diff --git a/hypervisor/src/kvm/x86_64.rs b/hypervisor/src/kvm/x86_64.rs
index 65f3701..31d9507 100644
--- a/hypervisor/src/kvm/x86_64.rs
+++ b/hypervisor/src/kvm/x86_64.rs
@@ -1245,7 +1245,6 @@
     fn from(state: &ClockState) -> Self {
         kvm_clock_data {
             clock: state.clock,
-            flags: state.flags,
             ..Default::default()
         }
     }
@@ -1255,7 +1254,6 @@
     fn from(clock_data: &kvm_clock_data) -> Self {
         ClockState {
             clock: clock_data.clock,
-            flags: clock_data.flags,
         }
     }
 }
diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs
index fb98127..247af37 100644
--- a/hypervisor/src/lib.rs
+++ b/hypervisor/src/lib.rs
@@ -521,12 +521,10 @@
 }
 
 /// The state of the paravirtual clock.
-#[derive(Debug, Default, Copy, Clone)]
+#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
 pub struct ClockState {
     /// Current pv clock timestamp, as seen by the guest
     pub clock: u64,
-    /// Hypervisor-specific feature flags for the pv clock
-    pub flags: u32,
 }
 
 /// The MPState represents the state of a processor.
diff --git a/src/crosvm/sys/linux.rs b/src/crosvm/sys/linux.rs
index 0060f23..ec00664 100644
--- a/src/crosvm/sys/linux.rs
+++ b/src/crosvm/sys/linux.rs
@@ -569,7 +569,8 @@
 
     #[cfg(feature = "balloon")]
     if let (Some(balloon_device_tube), Some(dynamic_mapping_device_tube)) =
-        (balloon_device_tube, dynamic_mapping_device_tube) {
+        (balloon_device_tube, dynamic_mapping_device_tube)
+    {
         let balloon_features = (cfg.balloon_page_reporting as u64)
             << BalloonFeatures::PageReporting as u64
             | (cfg.balloon_ws_reporting as u64) << BalloonFeatures::WSReporting as u64;
@@ -2971,6 +2972,7 @@
         }
         _ => {
             let response = request.execute(
+                &state.linux.vm,
                 &mut run_mode_opt,
                 state.disk_host_tubes,
                 &mut state.linux.pm,
@@ -3589,6 +3591,7 @@
     if let Some(path) = &cfg.restore_path {
         vm_control::do_restore(
             path.clone(),
+            &linux.vm,
             |msg| vcpu::kick_all_vcpus(&vcpu_handles, linux.irq_chip.as_irq_chip(), msg),
             |msg, index| {
                 vcpu::kick_vcpu(&vcpu_handles.get(index), linux.irq_chip.as_irq_chip(), msg)
diff --git a/src/sys/windows.rs b/src/sys/windows.rs
index 4240be2..29497dd 100644
--- a/src/sys/windows.rs
+++ b/src/sys/windows.rs
@@ -884,6 +884,7 @@
         let mut run_mode_opt = None;
         let vcpu_size = vcpu_boxes.lock().len();
         let resp = request.execute(
+            &guest_os.vm,
             &mut run_mode_opt,
             disk_host_tubes,
             &mut guest_os.pm,
@@ -1434,6 +1435,7 @@
     if let Some(path) = restore_path {
         vm_control::do_restore(
             path,
+            &guest_os.vm,
             |msg| {
                 kick_all_vcpus(
                     run_mode_arc.as_ref(),
diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs
index 895664f..cf1608f 100644
--- a/vm_control/src/lib.rs
+++ b/vm_control/src/lib.rs
@@ -71,6 +71,7 @@
 use hypervisor::IrqSource;
 pub use hypervisor::MemSlot;
 use hypervisor::Vm;
+use hypervisor::VmCap;
 use libc::EINVAL;
 use libc::EIO;
 use libc::ENODEV;
@@ -1609,6 +1610,7 @@
     /// received this `VmRequest`.
     pub fn execute(
         &self,
+        vm: &impl Vm,
         run_mode: &mut Option<VmRunMode>,
         disk_host_tubes: &[Tube],
         pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
@@ -1960,6 +1962,7 @@
                 info!("Starting crosvm snapshot");
                 match do_snapshot(
                     snapshot_path.to_path_buf(),
+                    vm,
                     kick_vcpus,
                     irq_handler_control,
                     device_control_tube,
@@ -1997,6 +2000,7 @@
 /// Snapshot the VM to file at `snapshot_path`
 fn do_snapshot(
     snapshot_path: PathBuf,
+    vm: &impl Vm,
     kick_vcpus: impl Fn(VcpuControl),
     irq_handler_control: &Tube,
     device_control_tube: &Tube,
@@ -2053,6 +2057,14 @@
 
     let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
 
+    // Snapshot hypervisor's paravirtualized clock.
+    let pvclock_snapshot = if vm.check_capability(VmCap::PvClock) {
+        serde_json::to_value(vm.get_pvclock()?)?
+    } else {
+        serde_json::Value::Null
+    };
+    snapshot_writer.write_fragment("pvclock", &pvclock_snapshot)?;
+
     // Snapshot Vcpus
     info!("VCPUs snapshotting...");
     let (send_chan, recv_chan) = mpsc::channel();
@@ -2101,6 +2113,7 @@
 /// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
 pub fn do_restore(
     restore_path: PathBuf,
+    vm: &impl Vm,
     kick_vcpus: impl Fn(VcpuControl),
     kick_vcpu: impl Fn(VcpuControl, usize),
     irq_handler_control: &Tube,
@@ -2114,6 +2127,14 @@
 
     let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
 
+    // Restore hypervisor's paravirtualized clock.
+    let pvclock_snapshot: serde_json::Value = snapshot_reader.read_fragment("pvclock")?;
+    if vm.check_capability(VmCap::PvClock) {
+        vm.set_pvclock(&serde_json::from_value(pvclock_snapshot)?)?;
+    } else {
+        anyhow::ensure!(pvclock_snapshot == serde_json::Value::Null);
+    };
+
     // Restore IrqChip
     let irq_snapshot: serde_json::Value = snapshot_reader.read_fragment("irqchip")?;
     restore_irqchip(irq_snapshot)?;