Loading arch/x86/kvm/mmu.c +119 −75 Original line number Diff line number Diff line Loading @@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) return !!(spte && (*spte & shadow_accessed_mask)); } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, const u8 *new, int *bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, r, flooded = 0; bool remote_flush, local_flush, zap_page; /* * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; zap_page = remote_flush = local_flush = false; offset = offset_in_page(gpa); pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); u64 gentry; int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ if (is_pae(vcpu) && bytes == 4) { if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ gpa &= ~(gpa_t)7; bytes = 8; r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); *gpa &= ~(gpa_t)7; *bytes = 8; r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); if (r) gentry = 0; new = (const u8 *)&gentry; } switch (bytes) { switch (*bytes) { case 4: gentry = *(const u32 *)new; break; Loading @@ -3583,52 +3563,60 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } return gentry; } /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have * enough objects in the cache. * If we're seeing too many writes to a page, it may no longer be a page table, * or we may be forking, in which case it is better to unmap the page. */ mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); ++vcpu->kvm->stat.mmu_pte_write; trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) { bool flooded = false; if (gfn == vcpu->arch.last_pt_write_gfn && !last_updated_pte_accessed(vcpu)) { ++vcpu->arch.last_pt_write_count; if (vcpu->arch.last_pt_write_count >= 3) flooded = 1; flooded = true; } else { vcpu->arch.last_pt_write_gfn = gfn; vcpu->arch.last_pt_write_count = 1; vcpu->arch.last_pte_updated = NULL; } mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; if (misaligned || flooded) { return flooded; } /* * Misaligned accesses are too much trouble to fix * up; also, they usually indicate a page is not used * as a page table. * * If we're seeing too many writes to a page, * it may no longer be a page table, or we may be * forking, in which case it is better to unmap the * page. * Misaligned accesses are too much trouble to fix up; also, they usually * indicate a page is not used as a page table. */ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, int bytes) { unsigned offset, pte_size, misaligned; pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; offset = offset_in_page(gpa); pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; return misaligned; } page_offset = offset; static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) { unsigned page_offset, quadrant; u64 *spte; int level; page_offset = offset_in_page(gpa); level = sp->role.level; npte = 1; *nspte = 1; if (!sp->role.cr4_pae) { page_offset <<= 1; /* 32->64 */ /* Loading @@ -3639,15 +3627,71 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (level == PT32_ROOT_LEVEL) { page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; npte = 2; *nspte = 2; } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; if (quadrant != sp->role.quadrant) return NULL; } spte = &sp->spt[page_offset / sizeof(*spte)]; return spte; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page, flooded, misaligned; /* * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; zap_page = remote_flush = local_flush = false; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have * enough objects in the cache. */ mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); ++vcpu->kvm->stat.mmu_pte_write; trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); flooded = detect_write_flooding(vcpu, gfn); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { misaligned = detect_write_misaligned(sp, gpa, bytes); if (misaligned || flooded) { zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } spte = get_written_sptes(sp, gpa, &npte); if (!spte) continue; local_flush = true; spte = &sp->spt[page_offset / sizeof(*spte)]; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte); Loading Loading
arch/x86/kvm/mmu.c +119 −75 Original line number Diff line number Diff line Loading @@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) return !!(spte && (*spte & shadow_accessed_mask)); } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, const u8 *new, int *bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; unsigned pte_size, page_offset, misaligned, quadrant, offset; int level, npte, r, flooded = 0; bool remote_flush, local_flush, zap_page; /* * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; zap_page = remote_flush = local_flush = false; offset = offset_in_page(gpa); pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); u64 gentry; int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ if (is_pae(vcpu) && bytes == 4) { if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ gpa &= ~(gpa_t)7; bytes = 8; r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); *gpa &= ~(gpa_t)7; *bytes = 8; r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); if (r) gentry = 0; new = (const u8 *)&gentry; } switch (bytes) { switch (*bytes) { case 4: gentry = *(const u32 *)new; break; Loading @@ -3583,52 +3563,60 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } return gentry; } /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have * enough objects in the cache. * If we're seeing too many writes to a page, it may no longer be a page table, * or we may be forking, in which case it is better to unmap the page. */ mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); ++vcpu->kvm->stat.mmu_pte_write; trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) { bool flooded = false; if (gfn == vcpu->arch.last_pt_write_gfn && !last_updated_pte_accessed(vcpu)) { ++vcpu->arch.last_pt_write_count; if (vcpu->arch.last_pt_write_count >= 3) flooded = 1; flooded = true; } else { vcpu->arch.last_pt_write_gfn = gfn; vcpu->arch.last_pt_write_count = 1; vcpu->arch.last_pte_updated = NULL; } mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; if (misaligned || flooded) { return flooded; } /* * Misaligned accesses are too much trouble to fix * up; also, they usually indicate a page is not used * as a page table. * * If we're seeing too many writes to a page, * it may no longer be a page table, or we may be * forking, in which case it is better to unmap the * page. * Misaligned accesses are too much trouble to fix up; also, they usually * indicate a page is not used as a page table. */ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, int bytes) { unsigned offset, pte_size, misaligned; pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; offset = offset_in_page(gpa); pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; return misaligned; } page_offset = offset; static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) { unsigned page_offset, quadrant; u64 *spte; int level; page_offset = offset_in_page(gpa); level = sp->role.level; npte = 1; *nspte = 1; if (!sp->role.cr4_pae) { page_offset <<= 1; /* 32->64 */ /* Loading @@ -3639,15 +3627,71 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (level == PT32_ROOT_LEVEL) { page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; npte = 2; *nspte = 2; } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; if (quadrant != sp->role.quadrant) return NULL; } spte = &sp->spt[page_offset / sizeof(*spte)]; return spte; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page, flooded, misaligned; /* * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; zap_page = remote_flush = local_flush = false; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have * enough objects in the cache. */ mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); ++vcpu->kvm->stat.mmu_pte_write; trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); flooded = detect_write_flooding(vcpu, gfn); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { misaligned = detect_write_misaligned(sp, gpa, bytes); if (misaligned || flooded) { zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } spte = get_written_sptes(sp, gpa, &npte); if (!spte) continue; local_flush = true; spte = &sp->spt[page_offset / sizeof(*spte)]; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte); Loading