untrusted comment: verify with openbsd-63-base.pub RWRxzbLwAd76ZbFkSUVLz0/YVJvgnyl+oAjxDdVzuLX1LBzl1dT7El1MEqJDkT4tgsCezfqr9fpGAa48+mup5ZS7KQNiUaLjKAM= OpenBSD 6.3 errata 018, August 24, 2018: The Intel L1TF bug allows a vmm guest to read host memory. Install the CPU firmware using fw_update(1), and apply this workaround. Apply by doing: signify -Vep /etc/signify/openbsd-63-base.pub -x 018_vmml1tf.patch.sig \ -m - | (cd /usr/src && patch -p0) And then rebuild and install a new kernel: fw_update KK=`sysctl -n kern.osversion | cut -d# -f1` cd /usr/src/sys/arch/`machine`/compile/$KK make obj make config make make install Index: sys/arch/amd64/amd64/identcpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v retrieving revision 1.95.2.2 diff -u -p -r1.95.2.2 identcpu.c --- sys/arch/amd64/amd64/identcpu.c 30 Jul 2018 14:45:05 -0000 1.95.2.2 +++ sys/arch/amd64/amd64/identcpu.c 21 Aug 2018 21:32:01 -0000 @@ -208,6 +208,7 @@ const struct { { SEFF0EDX_AVX512_4FMAPS, "AVX512FMAPS" }, { SEFF0EDX_IBRS, "IBRS,IBPB" }, { SEFF0EDX_STIBP, "STIBP" }, + { SEFF0EDX_L1DF, "L1DF" }, /* SEFF0EDX_ARCH_CAP (not printed) */ }, cpu_tpm_eaxfeatures[] = { { TPM_SENSOR, "SENSOR" }, @@ -985,6 +986,28 @@ cpu_check_vmm_cap(struct cpu_info *ci) CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap); if (cap & AMD_SVM_NESTED_PAGING_CAP) ci->ci_vmm_flags |= CI_VMM_RVI; + } + + /* + * Check "L1 flush on VM entry" (Intel L1TF vuln) semantics + */ + if (!strcmp(cpu_vendor, "GenuineIntel")) { + if (ci->ci_feature_sefflags_edx & SEFF0EDX_L1DF) + ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1; + else + ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 0; + + /* + * Certain CPUs may have the vulnerability remedied in + * hardware, check for that and override the setting + * calculated above. + */ + if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) { + msr = rdmsr(MSR_ARCH_CAPABILITIES); + if (msr & ARCH_CAPABILITIES_SKIP_L1DFL_VMENTRY) + ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = + VMX_SKIP_L1D_FLUSH; + } } } #endif /* NVMM > 0 */ Index: sys/arch/amd64/amd64/vmm.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v retrieving revision 1.185.2.1 diff -u -p -r1.185.2.1 vmm.c --- sys/arch/amd64/amd64/vmm.c 16 Jun 2018 20:37:22 -0000 1.185.2.1 +++ sys/arch/amd64/amd64/vmm.c 21 Aug 2018 20:37:20 -0000 @@ -43,6 +43,8 @@ /* #define VMM_DEBUG */ +void *l1tf_flush_region; + #ifdef VMM_DEBUG #define DPRINTF(x...) do { printf(x); } while(0) #else @@ -369,22 +371,38 @@ vmm_attach(struct device *parent, struct rw_init(&sc->vm_lock, "vmlistlock"); if (sc->nr_ept_cpus) { - printf(": VMX/EPT\n"); + printf(": VMX/EPT"); sc->mode = VMM_MODE_EPT; } else if (sc->nr_vmx_cpus) { - printf(": VMX\n"); + printf(": VMX"); sc->mode = VMM_MODE_VMX; } else if (sc->nr_rvi_cpus) { - printf(": SVM/RVI\n"); + printf(": SVM/RVI"); sc->mode = VMM_MODE_RVI; } else if (sc->nr_svm_cpus) { - printf(": SVM\n"); + printf(": SVM"); sc->mode = VMM_MODE_SVM; } else { - printf(": unknown\n"); + printf(": unknown"); sc->mode = VMM_MODE_UNKNOWN; } + if (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) { + if (!(curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) { + l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE, + &kv_any, &vmm_kp_contig, &kd_waitok); + if (!l1tf_flush_region) { + printf(" (failing, no memory)"); + sc->mode = VMM_MODE_UNKNOWN; + } else { + printf(" (using slow L1TF mitigation)"); + memset(l1tf_flush_region, 0xcc, + VMX_L1D_FLUSH_SIZE); + } + } + } + printf("\n"); + if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) { sc->max_vpid = curcpu()->ci_vmm_cap.vcc_svm.svm_max_asid; } else { @@ -4101,7 +4119,8 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v KERNEL_UNLOCK(); ret = vmx_enter_guest(&vcpu->vc_control_pa, - &vcpu->vc_gueststate, resume); + &vcpu->vc_gueststate, resume, + curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); /* * On exit, interrupts are disabled, and we are running with Index: sys/arch/amd64/amd64/vmm_support.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/vmm_support.S,v retrieving revision 1.9 diff -u -p -r1.9 vmm_support.S --- sys/arch/amd64/amd64/vmm_support.S 30 May 2017 17:49:47 -0000 1.9 +++ sys/arch/amd64/amd64/vmm_support.S 21 Aug 2018 21:02:57 -0000 @@ -16,6 +16,7 @@ */ #include "assym.h" +#include #include #include #include @@ -137,6 +138,7 @@ _C_LABEL(invept): _C_LABEL(vmx_enter_guest): movq %rdx, %r8 /* resume flag */ + movq %rcx, %r9 /* L1DF MSR support */ testq %r8, %r8 jnz skip_init @@ -221,6 +223,62 @@ skip_init: movq %rsp, %rax vmwrite %rax, %rdi /* Host RSP */ + /* + * Intel L1TF vulnerability fix + * + * Certain Intel CPUs are broken and allow guest VMs to bypass + * EPT entirely as their address harvesting logic treats guest + * PTEs as host physical addresses. Flush L1 Dcache to prevent + * information leakage by command MSR or manually reading a + * bunch of junk in order to fill sizeof(L1 Dcache)*2. + * + * %r9 (inherited from parameter 4 in %rcx earlier) + * determines the flushing requirements + * 0 - use manual "junk read" flush + * 1 - use MSR command + * 2 (VMX_SKIP_L1D_FLUSH) - no flush required on this CPU + */ + cmpq $VMX_SKIP_L1D_FLUSH, %r9 + je done_flush + + testq %r9, %r9 + jz no_l1df_msr + + /* CPU has command MSR */ + movq $MSR_FLUSH_CMD, %rcx + xorq %rdx, %rdx + movq $FLUSH_CMD_L1D_FLUSH, %rax + wrmsr + jmp done_flush + +no_l1df_msr: + xorq %r9, %r9 +l1df_tlb_loop: + /* XXX get the right L1 size from cpuid */ + cmpq $VMX_L1D_FLUSH_SIZE, %r9 + je l1df_tlb_done + movb l1tf_flush_region(%r9), %al + addq $PAGE_SIZE, %r9 + jmp l1df_tlb_loop + +l1df_tlb_done: + /* + * Serialize: ensure previous TLB loads don't pull PTDs + * or other PA-containing data into the L1D. + */ + xorq %rax, %rax + cpuid + + xorq %r9, %r9 +l1df_load_cache: + movb l1tf_flush_region(%r9), %al + /* XXX get the right cacheline size from cpuid */ + addq $0x40, %r9 + cmpq $VMX_L1D_FLUSH_SIZE, %r9 + jne l1df_load_cache + lfence + +done_flush: testq %r8, %r8 jnz do_resume @@ -234,6 +292,10 @@ skip_init: movq 0x50(%rsi), %r11 movq 0x48(%rsi), %r10 movq 0x40(%rsi), %r9 + movq %rsi, %r8 + /* XXX get the right cacheline size from cpuid */ + addq $0x40, %r8 + clflush (%r8) movq 0x38(%rsi), %r8 movq 0x30(%rsi), %rbp movq 0x28(%rsi), %rdi @@ -241,6 +303,7 @@ skip_init: movq 0x18(%rsi), %rcx movq 0x10(%rsi), %rbx movq 0x08(%rsi), %rax + clflush (%rsi) movq 0x00(%rsi), %rsi vmlaunch @@ -256,6 +319,10 @@ do_resume: movq 0x50(%rsi), %r11 movq 0x48(%rsi), %r10 movq 0x40(%rsi), %r9 + movq %rsi, %r8 + /* XXX get the right cacheline size from cpuid */ + addq $0x40, %r8 + clflush (%r8) movq 0x38(%rsi), %r8 movq 0x30(%rsi), %rbp movq 0x28(%rsi), %rdi @@ -263,7 +330,9 @@ do_resume: movq 0x18(%rsi), %rcx movq 0x10(%rsi), %rbx movq 0x08(%rsi), %rax + clflush (%rsi) movq 0x00(%rsi), %rsi + vmresume fail_launch_or_resume: /* Failed launch/resume (fell through) */ Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.119.2.1 diff -u -p -r1.119.2.1 cpu.h --- sys/arch/amd64/include/cpu.h 16 Jun 2018 20:37:22 -0000 1.119.2.1 +++ sys/arch/amd64/include/cpu.h 21 Aug 2018 20:37:20 -0000 @@ -71,6 +71,7 @@ struct vmx { uint32_t vmx_msr_table_size; uint32_t vmx_cr3_tgt_count; uint64_t vmx_vm_func; + uint8_t vmx_has_l1_flush_msr; }; /* Index: sys/arch/amd64/include/specialreg.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v retrieving revision 1.69.2.1 diff -u -p -r1.69.2.1 specialreg.h --- sys/arch/amd64/include/specialreg.h 16 Jun 2018 20:37:22 -0000 1.69.2.1 +++ sys/arch/amd64/include/specialreg.h 21 Aug 2018 21:32:01 -0000 @@ -219,6 +219,7 @@ #define SEFF0EDX_AVX512_4FMAPS 0x00000008 /* AVX-512 mult accum single prec */ #define SEFF0EDX_IBRS 0x04000000 /* IBRS / IBPB Speculation Control */ #define SEFF0EDX_STIBP 0x08000000 /* STIBP Speculation Control */ +#define SEFF0EDX_L1DF 0x10000000 /* L1D_FLUSH */ #define SEFF0EDX_ARCH_CAP 0x20000000 /* Has IA32_ARCH_CAPABILITIES MSR */ /* @@ -338,6 +339,7 @@ #define MSR_SPEC_CTRL 0x048 /* Speculation Control IBRS / STIBP */ #define SPEC_CTRL_IBRS (1ULL << 0) #define SPEC_CTRL_STIBP (1ULL << 1) +#define SPEC_CTRL_SSBD (1ULL << 2) #define MSR_PRED_CMD 0x049 /* Speculation Control IBPB */ #define PRED_CMD_IBPB (1ULL << 0) #define MSR_BIOS_UPDT_TRIG 0x079 @@ -354,6 +356,12 @@ #define MTRRcap_SMRR 0x800 /* bit 11 - SMM range reg supported */ #define MSR_ARCH_CAPABILITIES 0x10a #define ARCH_CAPABILITIES_RDCL_NO (1 << 0) /* Meltdown safe */ +#define ARCH_CAPABILITIES_IBRS_ALL (1 << 1) /* enhanced IBRS */ +#define ARCH_CAPABILITIES_RSBA (1 << 2) /* RSB Alternate */ +#define ARCH_CAPABILITIES_SKIP_L1DFL_VMENTRY (1 << 3) +#define ARCH_CAPABILITIES_SSB_NO (1 << 4) /* Spec St Byp safe */ +#define MSR_FLUSH_CMD 0x10b +#define FLUSH_CMD_L1D_FLUSH (1ULL << 0) #define MSR_BBL_CR_ADDR 0x116 /* PII+ only */ #define MSR_BBL_CR_DECC 0x118 /* PII+ only */ #define MSR_BBL_CR_CTL 0x119 /* PII+ only */ @@ -1216,6 +1224,9 @@ #define IA32_VMX_MSR_LIST_SIZE_MASK (7ULL << 25) #define IA32_VMX_CR3_TGT_SIZE_MASK (0x1FFULL << 16) + +#define VMX_SKIP_L1D_FLUSH 2 +#define VMX_L1D_FLUSH_SIZE (64 * 1024) /* * SVM Index: sys/arch/amd64/include/vmmvar.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v retrieving revision 1.50 diff -u -p -r1.50 vmmvar.h --- sys/arch/amd64/include/vmmvar.h 29 Nov 2017 02:46:10 -0000 1.50 +++ sys/arch/amd64/include/vmmvar.h 21 Aug 2018 20:37:20 -0000 @@ -869,7 +869,7 @@ int vmwrite(uint64_t, uint64_t); int vmread(uint64_t, uint64_t *); void invvpid(uint64_t, struct vmx_invvpid_descriptor *); void invept(uint64_t, struct vmx_invept_descriptor *); -int vmx_enter_guest(uint64_t *, struct vcpu_gueststate *, int); +int vmx_enter_guest(uint64_t *, struct vcpu_gueststate *, int, uint8_t); int svm_enter_guest(uint64_t, struct vcpu_gueststate *, struct region_descriptor *); void start_vmm_on_cpu(struct cpu_info *);