'vmm(4) testers needed: add IPI for vmclear and unlock kernel'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    vmm(4) testers needed: add IPI for vmclear and unlock kernel
From:       Dave Voutila <dave () sisu ! io>
Date:       2021-08-29 21:17:53
Message-ID: 874kb8lycz.fsf () kogelvis ! sisu ! home
[Download RAW message or body]

tech@,

I'm looking for testers for the following diff that's designed to
address race conditions in vmm(4)'s VMCS state handling.

In short, there exists a probability that when a guest migrates to
another CPU core that the VMCS state hasn't been persisted to memory and
what *is* in memory does not match what's loaded on the current
CPU. (This probability increases as you have more sockets/cores, so if
you have larger workstations or servers, even if you run 1-2 guests this
should still be testable!)

If you use Intel hardware and have experienced spurious guest crashes
with errors similar to:
  vcpu_run_vmx: failed vmlaunch...
or
  vcpu_run_vmx: can't {read,write} procbased ctls on exit
please give this diff a spin. It should remove such crashes.

This diff makes changes that *might affect* AMD SVM users, so even if
you are not an Intel user, I'd appreciate your testing. You should see
no changes, but you will be running a different code, so keep an eye
open for regressions!

To summarize what changes, this diff:

- adds a new amd64 IPI that requests a remote CPU to issue a vmclear
  instruction to flush the loaded VMCS state from the CPU to memory
  (this requires tracking a physical address on cpu_info, so it also
  adds a vmm specific lock around the cpu_info)
- adds a per-vcpu lock, required for guarding vcpu state to prevent
  corruption due to interrupt requests from vmd(8)'s emulated hardware
  event thread
- unlocks the kernel prior to dispatching the ioctl and relocks on exit
  or only when needed (e.g. uvm changes) (Don't get exciting thinking
  this makes things "faster," it's honestly more helpful for surfacing
  race conditions we've had that remained hidden underneath the lock.)
- simplifies the VMX run loop pulling some host state persistence out of
  the run loop
- removes the IPI kick when a guest injects an interrupt
- removes the yield calls from the vmx and svm run loops...if we
  "should" yield simply exit to vmd instead of trying to track cpu

I've tested on my X270 (i7-7500U) and my X13 Gen 1 (AMD Ryzen 5 PRO
4650U). My usual Nix-based/Linux stress test has failed to trigger
crashes.

Please consider compiling with "option VMM_DEBUG" set in your
config. There are extra state corruption checks (via the vmptsrt
instructions) that can help us track down any issues seen on Intel
machines via panicking the kernel.

TIA,
dv


Index: sys/arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.153
diff -u -p -r1.153 cpu.c
--- sys/arch/amd64/amd64/cpu.c	11 Mar 2021 11:16:55 -0000	1.153
+++ sys/arch/amd64/amd64/cpu.c	4 Aug 2021 13:31:05 -0000
@@ -789,6 +789,8 @@ cpu_init_vmm(struct cpu_info *ci)
 		if (!pmap_extract(pmap_kernel(), (vaddr_t)ci->ci_vmxon_region,
 		    &ci->ci_vmxon_region_pa))
 			panic("Can't locate VMXON region in phys mem");
+		ci->ci_vmcs_pa = VMX_VMCS_PA_CLEAR;
+		rw_init(&ci->ci_vmcs_lock, "vmcslock");
 	}
 }
 #endif /* NVMM > 0 */
Index: sys/arch/amd64/amd64/ipifuncs.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v
retrieving revision 1.35
diff -u -p -r1.35 ipifuncs.c
--- sys/arch/amd64/amd64/ipifuncs.c	13 Sep 2020 11:53:16 -0000	1.35
+++ sys/arch/amd64/amd64/ipifuncs.c	4 Aug 2021 13:31:05 -0000
@@ -63,6 +63,7 @@ void x86_64_ipi_halt(struct cpu_info *);
 void x86_64_ipi_wbinvd(struct cpu_info *);

 #if NVMM > 0
+void x86_64_ipi_vmclear_vmm(struct cpu_info *);
 void x86_64_ipi_start_vmm(struct cpu_info *);
 void x86_64_ipi_stop_vmm(struct cpu_info *);
 #endif /* NVMM > 0 */
@@ -85,7 +86,11 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf
 {
 	x86_64_ipi_halt,
 	x86_64_ipi_nop,
+#if NVMM > 0
+	x86_64_ipi_vmclear_vmm,
+#else
 	NULL,
+#endif
 	NULL,
 	x86_64_ipi_reload_pctr,
 	x86_64_ipi_reload_mtrr,
@@ -137,6 +142,12 @@ x86_64_ipi_reload_mtrr(struct cpu_info *
 #endif

 #if NVMM > 0
+void
+x86_64_ipi_vmclear_vmm(struct cpu_info *ci)
+{
+	vmclear_on_cpu(ci);
+}
+
 void
 x86_64_ipi_start_vmm(struct cpu_info *ci)
 {
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.286
diff -u -p -r1.286 vmm.c
--- sys/arch/amd64/amd64/vmm.c	8 Jun 2021 23:18:43 -0000	1.286
+++ sys/arch/amd64/amd64/vmm.c	4 Aug 2021 13:31:05 -0000
@@ -135,7 +135,7 @@ int vcpu_writeregs_svm(struct vcpu *, ui
 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
-int vcpu_reload_vmcs_vmx(uint64_t *);
+int vcpu_reload_vmcs_vmx(struct vcpu *);
 int vcpu_init(struct vcpu *);
 int vcpu_init_vmx(struct vcpu *);
 int vcpu_init_svm(struct vcpu *);
@@ -210,6 +210,10 @@ void vmm_init_pvclock(struct vcpu *, pad
 int vmm_update_pvclock(struct vcpu *);
 int vmm_pat_is_valid(uint64_t);

+#ifdef MULTIPROCESSOR
+static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
+#endif
+
 #ifdef VMM_DEBUG
 void dump_vcpu(struct vcpu *);
 void vmx_vcpu_dump_regs(struct vcpu *);
@@ -470,6 +474,8 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t
 {
 	int ret;

+	KERNEL_UNLOCK();
+
 	switch (cmd) {
 	case VMM_IOC_CREATE:
 		if ((ret = vmm_start()) != 0) {
@@ -514,6 +520,8 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t
 		ret = ENOTTY;
 	}

+	KERNEL_LOCK();
+
 	return (ret);
 }

@@ -629,12 +637,15 @@ vm_resetcpu(struct vm_resetcpu_params *v
 		return (ENOENT);
 	}

+	rw_enter_write(&vcpu->vc_lock);
+
 	if (vcpu->vc_state != VCPU_STATE_STOPPED) {
 		DPRINTF("%s: reset of vcpu %u on vm %u attempted "
 		    "while vcpu was in state %u (%s)\n", __func__,
 		    vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state,
 		    vcpu_state_decode(vcpu->vc_state));

+		rw_exit_write(&vcpu->vc_lock);
 		return (EBUSY);
 	}

@@ -646,9 +657,11 @@ vm_resetcpu(struct vm_resetcpu_params *v
 #ifdef VMM_DEBUG
 		dump_vcpu(vcpu);
 #endif /* VMM_DEBUG */
+		rw_exit_write(&vcpu->vc_lock);
 		return (EIO);
 	}

+	rw_exit_write(&vcpu->vc_lock);
 	return (0);
 }

@@ -688,25 +701,9 @@ vm_intr_pending(struct vm_intr_params *v
 	if (vcpu == NULL)
 		return (ENOENT);

+	rw_enter_write(&vcpu->vc_lock);
 	vcpu->vc_intr = vip->vip_intr;
-
-#ifdef MULTIPROCESSOR
-	/*
-	 * If the vcpu is running on another PCPU, attempt to force it
-	 * to exit to process the pending interrupt. This could race as
-	 * it could be running when we do the check but be stopped by the
-	 * time we send the IPI. In this case, there is a small extra
-	 * overhead to process the IPI but no other side effects.
-	 *
-	 * There is also a chance that the vcpu may have interrupts blocked.
-	 * That's ok as that condition will be checked on exit, and we will
-	 * simply re-enter the guest. This "fast notification" is done only
-	 * as an optimization.
-	 */
-	if (vcpu->vc_state == VCPU_STATE_RUNNING &&
-	    vip->vip_intr == 1)
-		x86_send_ipi(vcpu->vc_last_pcpu, X86_IPI_NOP);
-#endif /* MULTIPROCESSOR */
+	rw_exit_write(&vcpu->vc_lock);

 	return (0);
 }
@@ -987,6 +984,8 @@ vmx_mprotect_ept(vm_map_t vm_map, paddr_

 	pmap = vm_map->pmap;

+	KERNEL_LOCK();
+
 	for (addr = sgpa; addr < egpa; addr += PAGE_SIZE) {
 		pte = vmx_pmap_find_pte_ept(pmap, addr);
 		if (pte == NULL) {
@@ -1036,6 +1035,8 @@ vmx_mprotect_ept(vm_map_t vm_map, paddr_
 		invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid);
 	}

+	KERNEL_UNLOCK();
+
 	return ret;
 }

@@ -1340,6 +1341,47 @@ stop_vmm_on_cpu(struct cpu_info *ci)
 }

 /*
+ * vmclear_on_cpu
+ *
+ * Flush and clear VMCS on 'ci' by executing vmclear.
+ *
+ */
+void
+vmclear_on_cpu(struct cpu_info *ci)
+{
+	if ((ci->ci_flags & CPUF_VMM) && (ci->ci_vmm_flags & CI_VMM_VMX)) {
+		if (vmclear(&ci->ci_vmcs_pa))
+			panic("VMCLEAR ipi failed");
+		atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR);
+	}
+}
+
+#ifdef MULTIPROCESSOR
+static int
+vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
+{
+	int ret = 0, nticks = 100000;
+
+	rw_enter_write(&ci->ci_vmcs_lock);
+	atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa);
+	x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM);
+
+	while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR) {
+		CPU_BUSY_CYCLE();
+		if (--nticks <= 0) {
+			printf("%s: spun out\n", __func__);
+			ret = 1;
+			break;
+		}
+	}
+	atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
+	rw_exit_write(&ci->ci_vmcs_lock);
+
+	return (ret);
+}
+#endif /* MULTIPROCESSOR */
+
+/*
  * vm_create_check_mem_ranges
  *
  * Make sure that the guest physical memory ranges given by the user process
@@ -1705,35 +1747,45 @@ vm_impl_deinit(struct vm *vm)
 /*
  * vcpu_reload_vmcs_vmx
  *
- * Loads 'vmcs' on the current CPU, possibly flushing any old vmcs state
- * of the previous occupant.
+ * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
+ * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
+ * ipi will be used to remotely flush it before loading the VMCS locally.
  *
  * Parameters:
- *  vmcs: Pointer to uint64_t containing the PA of the vmcs to load
+ *  vcpu: Pointer to the vcpu needing its VMCS
  *
  * Return values:
  *  0: if successful
  *  EINVAL: an error occurred during flush or reload
  */
 int
-vcpu_reload_vmcs_vmx(uint64_t *vmcs)
+vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
 {
-	uint64_t old;
+	struct cpu_info *ci, *last_ci;
+
+	rw_assert_wrlock(&vcpu->vc_lock);

-	/* Flush any old state */
-	if (!vmptrst(&old)) {
-		if (old != 0xFFFFFFFFFFFFFFFFULL) {
-			if (vmclear(&old))
+	ci = curcpu();
+	last_ci = vcpu->vc_last_pcpu;
+
+	if (last_ci == NULL) {
+		/* First launch */
+		if (vmclear(&vcpu->vc_control_pa))
 				return (EINVAL);
-		}
-	} else
-		return (EINVAL);
+		atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);
+#ifdef MULTIPROCESSOR
+	} else if (last_ci != ci) {
+		/* We've moved CPUs at some point, so remote VMCLEAR */
+		if (vmx_remote_vmclear(last_ci, vcpu))
+			return (EINVAL);
+		KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED);
+#endif /* MULTIPROCESSOR */
+	}

-	/*
-	 * Load the VMCS onto this PCPU
-	 */
-	if (vmptrld(vmcs))
+	if (vmptrld(&vcpu->vc_control_pa)) {
+		printf("%s: vmptrld\n", __func__);
 		return (EINVAL);
+	}

 	return (0);
 }
@@ -1765,8 +1817,13 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
 	struct vcpu_segment_info *sregs = vrs->vrs_sregs;
 	struct vmx_msr_store *msr_store;

-	if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa))
-		return (EINVAL);
+#ifdef VMM_DEBUG
+	/* VMCS should be loaded... */
+	paddr_t pa = 0UL;
+	if (vmptrst(&pa))
+		panic("%s: vmptrst", __func__);
+	KASSERT(pa == vcpu->vc_control_pa);
+#endif /* VMM_DEBUG */

 	if (regmask & VM_RWREGS_GPRS) {
 		gprs[VCPU_REGS_RAX] = vcpu->vc_gueststate.vg_rax;
@@ -1865,8 +1922,6 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
 errout:
 	ret = EINVAL;
 out:
-	if (vmclear(&vcpu->vc_control_pa))
-		ret = EINVAL;
 	return (ret);
 }

@@ -2038,10 +2093,18 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
 	struct vmx_msr_store *msr_store;

 	if (loadvmcs) {
-		if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa))
+		if (vcpu_reload_vmcs_vmx(vcpu))
 			return (EINVAL);
 	}

+#ifdef VMM_DEBUG
+	/* VMCS should be loaded... */
+	paddr_t pa = 0UL;
+	if (vmptrst(&pa))
+		panic("%s: vmptrst", __func__);
+	KASSERT(pa == vcpu->vc_control_pa);
+#endif /* VMM_DEBUG */
+
 	if (regmask & VM_RWREGS_GPRS) {
 		vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX];
 		vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX];
@@ -2651,7 +2714,7 @@ svm_set_dirty(struct vcpu *vcpu, uint32_
 int
 vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
 {
-	int ret, ug;
+	int ret = 0, ug = 0;
 	uint32_t cr0, cr4;
 	uint32_t pinbased, procbased, procbased2, exit, entry;
 	uint32_t want1, want0;
@@ -2659,16 +2722,24 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
 	uint16_t ctrl, vpid;
 	struct vmx_msr_store *msr_store;

-	ret = 0;
-	ug = 0;
+	rw_assert_wrlock(&vcpu->vc_lock);

 	cr0 = vrs->vrs_crs[VCPU_REGS_CR0];

-	if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa)) {
+	if (vcpu_reload_vmcs_vmx(vcpu)) {
 		DPRINTF("%s: error reloading VMCS\n", __func__);
-		return (EINVAL);
+		ret = EINVAL;
+		goto exit;
 	}

+#ifdef VMM_DEBUG
+	/* VMCS should be loaded... */
+	paddr_t pa = 0UL;
+	if (vmptrst(&pa))
+		panic("%s: vmptrst", __func__);
+	KASSERT(pa == vcpu->vc_control_pa);
+#endif /* VMM_DEBUG */
+
 	/* Compute Basic Entry / Exit Controls */
 	vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC);
 	vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS);
@@ -3197,14 +3268,14 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
 	/* XXX PAT shadow */
 	vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);

-exit:
 	/* Flush the VMCS */
 	if (vmclear(&vcpu->vc_control_pa)) {
 		DPRINTF("%s: vmclear failed\n", __func__);
 		ret = EINVAL;
-		goto exit;
 	}
+	atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED);

+exit:
 	return (ret);
 }

@@ -3229,13 +3300,12 @@ vcpu_init_vmx(struct vcpu *vcpu)
 {
 	struct vmcs *vmcs;
 	uint32_t cr0, cr4;
-	int ret;
-
-	ret = 0;
+	int ret = 0;

 	/* Allocate VMCS VA */
 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
 	    &kd_waitok);
+	vcpu->vc_vmx_vmcs_state = VMCS_CLEARED;

 	if (!vcpu->vc_control_va)
 		return (ENOMEM);
@@ -3389,12 +3459,18 @@ vcpu_init_vmx(struct vcpu *vcpu)
 	}

 	/* VMCS link */
-	if (vmwrite(VMCS_LINK_POINTER, 0xFFFFFFFFFFFFFFFF)) {
+	if (vmwrite(VMCS_LINK_POINTER, VMX_VMCS_PA_CLEAR)) {
 		DPRINTF("%s: error writing VMCS link pointer\n", __func__);
 		ret = EINVAL;
 		goto exit;
 	}

+	/* Flush the initial VMCS */
+	if (vmclear(&vcpu->vc_control_pa)) {
+		DPRINTF("%s: vmclear failed\n", __func__);
+		ret = EINVAL;
+	}
+
 exit:
 	if (ret) {
 		if (vcpu->vc_control_va)
@@ -3412,11 +3488,6 @@ exit:
 		if (vcpu->vc_vmx_msr_entry_load_va)
 			km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
 			    PAGE_SIZE, &kv_page, &kp_zero);
-	} else {
-		if (vmclear(&vcpu->vc_control_pa)) {
-			DPRINTF("%s: vmclear failed\n", __func__);
-			ret = EINVAL;
-		}
 	}

 	return (ret);
@@ -3472,9 +3543,7 @@ vcpu_reset_regs(struct vcpu *vcpu, struc
 int
 vcpu_init_svm(struct vcpu *vcpu)
 {
-	int ret;
-
-	ret = 0;
+	int ret = 0;

 	/* Allocate VMCB VA */
 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
@@ -3588,6 +3657,9 @@ vcpu_init(struct vcpu *vcpu)
 	vcpu->vc_state = VCPU_STATE_STOPPED;
 	vcpu->vc_vpid = 0;
 	vcpu->vc_pvclock_system_gpa = 0;
+	vcpu->vc_last_pcpu = NULL;
+
+	rw_init(&vcpu->vc_lock, "vcpulock");

 	/* Shadow PAT MSR, starting with host's value. */
 	vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
@@ -3694,6 +3766,7 @@ vm_teardown(struct vm *vm)
 	struct vcpu *vcpu, *tmp;

 	rw_assert_wrlock(&vmm_softc->vm_lock);
+	KERNEL_LOCK();

 	/* Free VCPUs */
 	rw_enter_write(&vm->vm_vcpu_lock);
@@ -3716,8 +3789,10 @@ vm_teardown(struct vm *vm)
 		if (vmm_softc->vm_ct < 1)
 			vmm_stop();
 	}
-	rw_exit_write(&vm->vm_vcpu_lock);
 	pool_put(&vm_pool, vm);
+
+	KERNEL_UNLOCK();
+	rw_exit_write(&vm->vm_vcpu_lock);
 }

 /*
@@ -4150,11 +4225,11 @@ vm_run(struct vm_run_params *vrp)
 				ret = EBUSY;
 			else
 				atomic_inc_int(&vm->vm_vcpus_running);
-		}
-		rw_exit_read(&vm->vm_vcpu_lock);
-
-		if (vcpu == NULL)
+			rw_enter_write(&vcpu->vc_lock);
+		} else
 			ret = ENOENT;
+
+		rw_exit_read(&vm->vm_vcpu_lock);
 	}
 	rw_exit_read(&vmm_softc->vm_lock);

@@ -4174,6 +4249,7 @@ vm_run(struct vm_run_params *vrp)
 	if (vrp->vrp_continue) {
 		if (copyin(vrp->vrp_exit, &vcpu->vc_exit,
 		    sizeof(struct vm_exit)) == EFAULT) {
+			rw_exit_write(&vcpu->vc_lock);
 			return (EFAULT);
 		}
 	}
@@ -4221,6 +4297,8 @@ vm_run(struct vm_run_params *vrp)
 		vcpu->vc_state = VCPU_STATE_TERMINATED;
 	}

+	rw_exit_write(&vcpu->vc_lock);
+
 	return (ret);
 }

@@ -4262,7 +4340,7 @@ vmm_fpurestore(struct vcpu *vcpu)
 {
 	struct cpu_info *ci = curcpu();

-	/* save vmmd's FPU state if we haven't already */
+	/* save vmm's FPU state if we haven't already */
 	if (ci->ci_flags & CPUF_USERXSTATE) {
 		ci->ci_flags &= ~CPUF_USERXSTATE;
 		fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
@@ -4487,10 +4565,10 @@ vmm_translate_gva(struct vcpu *vcpu, uin
 int
 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 {
-	int ret = 0, resume = 0, cleared = 1, locked, exitinfo;
+	int ret = 0, exitinfo;
 	struct region_descriptor gdt;
-	struct cpu_info *ci = NULL;
-	uint64_t exit_reason, cr3, vmcs_ptr, insn_error;
+	struct cpu_info *ci = curcpu();
+	uint64_t exit_reason, cr3, insn_error;
 	struct schedstate_percpu *spc;
 	struct vmx_invvpid_descriptor vid;
 	uint64_t eii, procbased, int_st;
@@ -4498,7 +4576,12 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 	u_long s;
 	struct region_descriptor gdtr, idtr;

-	irq = vrp->vrp_irq;
+	rw_assert_wrlock(&vcpu->vc_lock);
+
+	if (vcpu_reload_vmcs_vmx(vcpu)) {
+		printf("%s: failed (re)loading vmcs\n", __func__);
+		return (EINVAL);
+	}

 	/*
 	 * If we are returning from userspace (vmd) because we exited
@@ -4506,6 +4589,8 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 	 * needs to be fixed up depends on what vmd populated in the
 	 * exit data structure.
 	 */
+	irq = vrp->vrp_irq;
+
 	if (vrp->vrp_continue) {
 		switch (vcpu->vc_gueststate.vg_exit_reason) {
 		case VMX_EXIT_IO:
@@ -4513,16 +4598,12 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 				vcpu->vc_gueststate.vg_rax =
 				    vcpu->vc_exit.vei.vei_data;
 			break;
+		case VM_EXIT_NONE:
 		case VMX_EXIT_HLT:
-			break;
 		case VMX_EXIT_INT_WINDOW:
-			break;
 		case VMX_EXIT_EXTINT:
-			break;
 		case VMX_EXIT_EPT_VIOLATION:
-			break;
 		case VMX_EXIT_CPUID:
-			break;
 		case VMX_EXIT_XSETBV:
 			break;
 #ifdef VMM_DEBUG
@@ -4530,12 +4611,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 			DPRINTF("%s: vm %d vcpu %d triple fault\n",
 			    __func__, vcpu->vc_parent->vm_id,
 			    vcpu->vc_id);
-			/*
-			 * Ensure we have the proper VMCS loaded before
-			 * dumping register and VMCS state
-			 */
-			if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa))
-				break;
 			vmx_vcpu_dump_regs(vcpu);
 			dump_vcpu(vcpu);
 			vmx_dump_vmcs(vcpu);
@@ -4545,27 +4620,15 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 			    "due to invalid guest state\n",
 			    __func__, vcpu->vc_parent->vm_id,
 			    vcpu->vc_id);
-			/*
-			 * Ensure we have the proper VMCS loaded before
-			 * dumping register and VMCS state
-			 */
-			if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa))
-				break;
 			vmx_vcpu_dump_regs(vcpu);
 			dump_vcpu(vcpu);
-			return EINVAL;
+			return (EINVAL);
 		default:
 			DPRINTF("%s: unimplemented exit type %d (%s)\n",
 			    __func__,
 			    vcpu->vc_gueststate.vg_exit_reason,
 			    vmx_exit_reason_decode(
 				vcpu->vc_gueststate.vg_exit_reason));
-			/*
-			 * Ensure we have the proper VMCS loaded before
-			 * dumping register and VMCS state
-			 */
-			if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa))
-				break;
 			vmx_vcpu_dump_regs(vcpu);
 			dump_vcpu(vcpu);
 			break;
@@ -4573,94 +4636,82 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 		}
 	}

-	while (ret == 0) {
-		vmm_update_pvclock(vcpu);
-		if (cleared || ci != curcpu()) {
-			/*
-			 * We are launching for the first time, we yielded the
-			 * pcpu, or we are resuming from a different pcpu, so we
-			 * need to reset certain pcpu-specific values.
-			 */
-			resume = cleared = 0;
-			ci = curcpu();
-			setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
+	setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
+	if (gdt.rd_base == 0) {
+		printf("%s: setregion\n", __func__);
+		return (EINVAL);
+	}

-			vcpu->vc_last_pcpu = ci;
+	/* Host GDTR base */
+	if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) {
+		printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
+		    VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base);
+		return (EINVAL);
+	}

-			if (vmptrld(&vcpu->vc_control_pa)) {
-				ret = EINVAL;
-				break;
-			}
+	/* Host TR base */
+	if (vmwrite(VMCS_HOST_IA32_TR_BASE, (uint64_t)ci->ci_tss)) {
+		printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
+		    VMCS_HOST_IA32_TR_BASE, (uint64_t)ci->ci_tss);
+		return (EINVAL);
+	}

-			if (gdt.rd_base == 0) {
-				ret = EINVAL;
-				break;
-			}
+	/* Host CR3 */
+	cr3 = rcr3();
+	if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) {
+		printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
+		    VMCS_HOST_IA32_CR3, cr3);
+		return (EINVAL);
+	}

-			/* Host GDTR base */
-			if (vmwrite(VMCS_HOST_IA32_GDTR_BASE, gdt.rd_base)) {
-				ret = EINVAL;
-				break;
-			}
+	/* Handle vmd(8) injected interrupts */
+	/* Is there an interrupt pending injection? */
+	if (irq != 0xFFFF) {
+		if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
+			printf("%s: can't get interruptibility state\n",
+			    __func__);
+			return (EINVAL);
+		}

-			/* Host TR base */
-			if (vmwrite(VMCS_HOST_IA32_TR_BASE,
-			    (uint64_t)ci->ci_tss)) {
-				ret = EINVAL;
-				break;
+		/* Interruptbility state 0x3 covers NMIs and STI */
+		if (!(int_st & 0x3) && vcpu->vc_irqready) {
+			eii = (irq & 0xFF);
+			eii |= (1ULL << 31);	/* Valid */
+			eii |= (0ULL << 8);	/* Hardware Interrupt */
+			if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
+				printf("vcpu_run_vmx: can't vector "
+				    "interrupt to guest\n");
+				return (EINVAL);
 			}

-			/* Host CR3 */
-			cr3 = rcr3();
-			if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) {
-				ret = EINVAL;
-				break;
-			}
+			irq = 0xFFFF;
 		}
-
-		/* Handle vmd(8) injected interrupts */
-		/* Is there an interrupt pending injection? */
-		if (irq != 0xFFFF) {
-			if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
-				printf("%s: can't get interruptibility state\n",
-				    __func__);
-				ret = EINVAL;
-				break;
+	} else if (!vcpu->vc_intr) {
+		/*
+		 * Disable window exiting
+		 */
+		if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
+			printf("%s: can't read procbased ctls on exit\n",
+			    __func__);
+			return (EINVAL);
+		} else {
+			procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING;
+			if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
+				printf("%s: can't write procbased ctls "
+				    "on exit\n", __func__);
+				return (EINVAL);
 			}
+		}
+	}

-			/* Interruptbility state 0x3 covers NMIs and STI */
-			if (!(int_st & 0x3) && vcpu->vc_irqready) {
-				eii = (irq & 0xFF);
-				eii |= (1ULL << 31);	/* Valid */
-				eii |= (0ULL << 8);	/* Hardware Interrupt */
-				if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
-					printf("vcpu_run_vmx: can't vector "
-					    "interrupt to guest\n");
-					ret = EINVAL;
-					break;
-				}
+	while (ret == 0) {
+#ifdef VMM_DEBUG
+		paddr_t pa = 0UL;
+		vmptrst(&pa);
+		KASSERT(pa == vcpu->vc_control_pa);
+#endif /* VMM_DEBUG */

-				irq = 0xFFFF;
-			}
-		} else if (!vcpu->vc_intr) {
-			/*
-			 * Disable window exiting
-			 */
-			if (vmread(VMCS_PROCBASED_CTLS, &procbased)) {
-				printf("%s: can't read procbased ctls on "
-				    "exit\n", __func__);
-				ret = EINVAL;
-				break;
-			} else {
-				procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING;
-				if (vmwrite(VMCS_PROCBASED_CTLS, procbased)) {
-					printf("%s: can't write procbased ctls "
-					    "on exit\n", __func__);
-					ret = EINVAL;
-					break;
-				}
-			}
-		}
+		vmm_update_pvclock(vcpu);

 		/* Inject event if present */
 		if (vcpu->vc_event != 0) {
@@ -4704,12 +4755,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 			invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB, &vid);
 		}

-		TRACEPOINT(vmm, guest_enter, vcpu, vrp);
-
 		/* Start / resume the VCPU */
-#ifdef VMM_DEBUG
-		KERNEL_ASSERT_LOCKED();
-#endif /* VMM_DEBUG */

 		/* Disable interrupts and save the current host FPU state. */
 		s = intr_disable();
@@ -4722,9 +4768,11 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 		sidt(&idtr);
 		sldt(&ldt_sel);

-		KERNEL_UNLOCK();
+		TRACEPOINT(vmm, guest_enter, vcpu, vrp);
+
 		ret = vmx_enter_guest(&vcpu->vc_control_pa,
-		    &vcpu->vc_gueststate, resume,
+		    &vcpu->vc_gueststate,
+		    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED),
 		    ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);

 		bare_lgdt(&gdtr);
@@ -4737,10 +4785,14 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 		 * state before re-enabling interrupts.
 		 */
 		vmm_fpusave(vcpu);
-
 		intr_restore(s);

+		TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
+
+		atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED);
 		exit_reason = VM_EXIT_NONE;
+
+		/* If we exited successfully ... */
 		if (ret == 0) {
 			/*
 			 * ret == 0 implies we entered the guest, and later
@@ -4753,35 +4805,18 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 				printf("%s: can't read guest rflags during "
 				    "exit\n", __func__);
 				ret = EINVAL;
-				KERNEL_LOCK();
 				break;
                         }
-		}
-
-		TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason);
-
-		if (ret || exitinfo != VMX_EXIT_INFO_COMPLETE ||
-		    exit_reason != VMX_EXIT_EXTINT) {
-			KERNEL_LOCK();
-			locked = 1;
-		} else
-			locked = 0;

-		/* If we exited successfully ... */
-		if (ret == 0) {
-			resume = 1;
+			/* Update our state */
 			if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP)) {
 				printf("%s: cannot read guest rip\n", __func__);
-				if (!locked)
-					KERNEL_LOCK();
 				ret = EINVAL;
 				break;
 			}

 			if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON)) {
 				printf("%s: cant read exit reason\n", __func__);
-				if (!locked)
-					KERNEL_LOCK();
 				ret = EINVAL;
 				break;
 			}
@@ -4793,25 +4828,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 			vcpu->vc_gueststate.vg_exit_reason = exit_reason;
 			ret = vmx_handle_exit(vcpu);

-			/*
-			 * When the guest exited due to an external interrupt,
-			 * we do not yet hold the kernel lock: we need to
-			 * handle interrupts first before grabbing the lock:
-			 * the interrupt handler might do work that
-			 * another CPU holding the kernel lock waits for.
-			 *
-			 * Example: the TLB shootdown code in the pmap module
-			 * sends an IPI to all other CPUs and busy-waits for
-			 * them to decrement tlb_shoot_wait to zero. While
-			 * busy-waiting, the kernel lock is held.
-			 *
-			 * If this code here attempted to grab the kernel lock
-			 * before handling the interrupt, it would block
-			 * forever.
-			 */
-			if (!locked)
-				KERNEL_LOCK();
-
 			if (vcpu->vc_gueststate.vg_rflags & PSL_I)
 				vcpu->vc_irqready = 1;
 			else
@@ -4850,72 +4866,61 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
 				break;
 			}

-			/* Check if we should yield - don't hog the cpu */
+			/* Check if we should yield - don't hog the {p,v}pu */
 			spc = &ci->ci_schedstate;
-			if (spc->spc_schedflags & SPCF_SHOULDYIELD) {
-				if (vmclear(&vcpu->vc_control_pa)) {
-					ret = EINVAL;
+			if (spc->spc_schedflags & SPCF_SHOULDYIELD)
 					break;
-				}
-				cleared = 1;
-				yield();
+
+		} else {
+			/*
+			 * We failed vmresume or vmlaunch for some reason,
+			 * typically due to invalid vmcs state or other
+			 * reasons documented in SDM Vol 3C 30.4.
+			 */
+			switch (ret) {
+			case VMX_FAIL_LAUNCH_INVALID_VMCS:
+				printf("%s: failed %s with invalid vmcs\n",
+				    __func__,
+				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
+					? "vmresume" : "vmlaunch"));
+				break;
+			case VMX_FAIL_LAUNCH_VALID_VMCS:
+				printf("%s: failed %s with valid vmcs\n",
+				    __func__,
+				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
+					? "vmresume" : "vmlaunch"));
+				break;
+			default:
+				printf("%s: failed %s for unknown reason\n",
+				    __func__,
+				    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED
+					? "vmresume" : "vmlaunch"));
 			}
-		} else if (ret == VMX_FAIL_LAUNCH_INVALID_VMCS) {
-			printf("%s: failed launch with invalid vmcs\n",
-			    __func__);
-#ifdef VMM_DEBUG
-			vmx_vcpu_dump_regs(vcpu);
-			dump_vcpu(vcpu);
-#endif /* VMM_DEBUG */
+
 			ret = EINVAL;
-		} else if (ret == VMX_FAIL_LAUNCH_VALID_VMCS) {
-			exit_reason = vcpu->vc_gueststate.vg_exit_reason;
-			printf("%s: failed launch with valid vmcs, code=%lld "
-			    "(%s)\n", __func__, exit_reason,
-			    vmx_instruction_error_decode(exit_reason));

+			/* Try to translate a vmfail error code, if possible. */
 			if (vmread(VMCS_INSTRUCTION_ERROR, &insn_error)) {
 				printf("%s: can't read insn error field\n",
 				    __func__);
 			} else
-				printf("%s: insn error code = %lld\n",
-				    __func__, insn_error);
-#ifdef VMM_DEBUG
-			vmx_vcpu_dump_regs(vcpu);
-			dump_vcpu(vcpu);
-#endif /* VMM_DEBUG */
-			ret = EINVAL;
-		} else {
-			printf("%s: failed %s for unknown reason %d\n",
-			    __func__, resume ? "vmresume" : "vmlaunch", ret);
+				printf("%s: error code = %lld, %s\n", __func__,
+				    insn_error,
+				    vmx_instruction_error_decode(insn_error));
 #ifdef VMM_DEBUG
 			vmx_vcpu_dump_regs(vcpu);
 			dump_vcpu(vcpu);
 #endif /* VMM_DEBUG */
-			ret = EINVAL;
 		}
 	}

+	vcpu->vc_last_pcpu = curcpu();
+
 	/* Copy the VCPU register state to the exit structure */
 	if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs))
 		ret = EINVAL;
 	vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
-	/*
-	 * We are heading back to userspace (vmd), either because we need help
-	 * handling an exit, a guest interrupt is pending, or we failed in some
-	 * way to enter the guest. Clear any current VMCS pointer as we may end
-	 * up coming back on a different CPU.
-	 */
-	if (!vmptrst(&vmcs_ptr)) {
-		if (vmcs_ptr != 0xFFFFFFFFFFFFFFFFULL)
-			if (vmclear(&vcpu->vc_control_pa))
-				ret = EINVAL;
-	} else
-		ret = EINVAL;

-#ifdef VMM_DEBUG
-	KERNEL_ASSERT_LOCKED();
-#endif /* VMM_DEBUG */
 	return (ret);
 }

@@ -5247,8 +5252,10 @@ vmx_handle_exit(struct vcpu *vcpu)
 		update_rip = 0;
 		break;
 	default:
+#ifdef VMM_DEBUG
 		DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
 		    exit_reason, vmx_exit_reason_decode(exit_reason));
+#endif /* VMM_DEBUG */
 		return (EINVAL);
 	}

@@ -5541,8 +5548,10 @@ vmx_fault_page(struct vcpu *vcpu, paddr_
 		return (EAGAIN);
 	}

+	KERNEL_LOCK();
 	ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
 	    PROT_READ | PROT_WRITE | PROT_EXEC);
+	KERNEL_UNLOCK();

 	if (ret)
 		printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
@@ -7066,10 +7075,6 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 		TRACEPOINT(vmm, guest_enter, vcpu, vrp);

 		/* Start / resume the VCPU */
-#ifdef VMM_DEBUG
-		KERNEL_ASSERT_LOCKED();
-#endif /* VMM_DEBUG */
-
 		/* Disable interrupts and save the current host FPU state. */
 		clgi();
 		if ((ret = vmm_fpurestore(vcpu))) {
@@ -7078,8 +7083,6 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 		}

 		KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR);
-		KERNEL_UNLOCK();
-
 		wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa);

 		ret = svm_enter_guest(vcpu->vc_control_pa,
@@ -7097,7 +7100,6 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 		 * (external interrupt), the interrupt will be processed now.
 		 */
 		stgi();
-		KERNEL_LOCK();

 		vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
 		vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE;
@@ -7153,9 +7155,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct v

 			/* Check if we should yield - don't hog the cpu */
 			spc = &ci->ci_schedstate;
-			if (spc->spc_schedflags & SPCF_SHOULDYIELD) {
-				yield();
-			}
+			if (spc->spc_schedflags & SPCF_SHOULDYIELD)
+				break;
 		}
 	}

@@ -7168,9 +7169,6 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
 	if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL, &vcpu->vc_exit.vrs))
 		ret = EINVAL;

-#ifdef VMM_DEBUG
-	KERNEL_ASSERT_LOCKED();
-#endif /* VMM_DEBUG */
 	return (ret);
 }

@@ -7789,6 +7787,8 @@ vmx_dump_vmcs(struct vcpu *vcpu)
 	/* XXX save and load new vmcs, restore at end */

 	DPRINTF("--CURRENT VMCS STATE--\n");
+	printf("VMCS launched: %s\n",
+	    (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED) ? "Yes" : "No");
 	DPRINTF("VMXON revision : 0x%x\n",
 	    curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
 	DPRINTF("CR0 fixed0: 0x%llx\n",
Index: sys/arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.140
diff -u -p -r1.140 cpu.h
--- sys/arch/amd64/include/cpu.h	6 Jul 2021 09:34:06 -0000	1.140
+++ sys/arch/amd64/include/cpu.h	4 Aug 2021 13:31:05 -0000
@@ -49,6 +49,7 @@
 #endif /* _KERNEL */

 #include <sys/device.h>
+#include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sensors.h>
 #include <sys/srp.h>
@@ -211,6 +212,9 @@ struct cpu_info {
 	int64_t		ci_tsc_skew;		/* counter skew vs cpu0 */

 	char		ci_panicbuf[512];
+
+	paddr_t		ci_vmcs_pa;
+	struct rwlock	ci_vmcs_lock;
 };

 #define CPUF_BSP	0x0001		/* CPU is the original BSP */
Index: sys/arch/amd64/include/intrdefs.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
retrieving revision 1.21
diff -u -p -r1.21 intrdefs.h
--- sys/arch/amd64/include/intrdefs.h	13 Sep 2020 11:53:16 -0000	1.21
+++ sys/arch/amd64/include/intrdefs.h	4 Aug 2021 13:31:05 -0000
@@ -76,6 +76,7 @@

 #define X86_IPI_HALT			0x00000001
 #define X86_IPI_NOP			0x00000002
+#define X86_IPI_VMCLEAR_VMM		0x00000004
 #define X86_IPI_PCTR			0x00000010
 #define X86_IPI_MTRR			0x00000020
 #define X86_IPI_SETPERF			0x00000040
@@ -84,7 +85,7 @@
 #define X86_IPI_STOP_VMM		0x00000200
 #define X86_IPI_WBINVD			0x00000400

-#define X86_NIPI			11
+#define X86_NIPI			12

 #define IREENT_MAGIC	0x18041969

Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.72
diff -u -p -r1.72 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h	20 May 2021 17:33:44 -0000	1.72
+++ sys/arch/amd64/include/vmmvar.h	4 Aug 2021 13:31:05 -0000
@@ -106,6 +106,7 @@
  * VMX: Misc defines
  */
 #define VMX_MAX_CR3_TARGETS			256
+#define VMX_VMCS_PA_CLEAR			0xFFFFFFFFFFFFFFFFUL

 #define VM_EXIT_TERMINATED			0xFFFE
 #define VM_EXIT_NONE				0xFFFF
@@ -902,7 +903,7 @@ struct vcpu {

 	/* VMCS / VMCB pointer */
 	vaddr_t vc_control_va;
-	uint64_t vc_control_pa;
+	paddr_t vc_control_pa;

 	/* VLAPIC pointer */
 	vaddr_t vc_vlapic_va;
@@ -920,6 +921,7 @@ struct vcpu {

 	uint8_t vc_virt_mode;

+	struct rwlock vc_lock;
 	struct cpu_info *vc_last_pcpu;
 	struct vm_exit vc_exit;

@@ -961,6 +963,9 @@ struct vcpu {
 	uint8_t vc_vmx_vpid_enabled;
 	uint64_t vc_vmx_cr0_fixed1;
 	uint64_t vc_vmx_cr0_fixed0;
+	uint32_t vc_vmx_vmcs_state;
+#define VMCS_CLEARED	0
+#define VMCS_LAUNCHED	1

 	/* SVM only */
 	vaddr_t vc_svm_hsa_va;
@@ -974,18 +979,19 @@ SLIST_HEAD(vcpu_head, vcpu);
 void	vmm_dispatch_intr(vaddr_t);
 int	vmxon(uint64_t *);
 int	vmxoff(void);
-int	vmclear(uint64_t *);
-int	vmptrld(uint64_t *);
-int	vmptrst(uint64_t *);
+int	vmclear(paddr_t *);
+int	vmptrld(paddr_t *);
+int	vmptrst(paddr_t *);
 int	vmwrite(uint64_t, uint64_t);
 int	vmread(uint64_t, uint64_t *);
 void	invvpid(uint64_t, struct vmx_invvpid_descriptor *);
 void	invept(uint64_t, struct vmx_invept_descriptor *);
-int	vmx_enter_guest(uint64_t *, struct vcpu_gueststate *, int, uint8_t);
+int	vmx_enter_guest(paddr_t *, struct vcpu_gueststate *, int, uint8_t);
 int	svm_enter_guest(uint64_t, struct vcpu_gueststate *,
     struct region_descriptor *);
 void	start_vmm_on_cpu(struct cpu_info *);
 void	stop_vmm_on_cpu(struct cpu_info *);
+void	vmclear_on_cpu(struct cpu_info *);

 #endif /* _KERNEL */

[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic