Skip to content

Commit f8e1449

Browse files
Ben Gardonbonzini
authored andcommitted
kvm: x86/mmu: Add access tracking for tdp_mmu
In order to interoperate correctly with the rest of KVM and other Linux subsystems, the TDP MMU must correctly handle various MMU notifiers. The main Linux MM uses the access tracking MMU notifiers for swap and other features. Add hooks to handle the test/flush HVA (range) family of MMU notifiers. Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell machine. This series introduced no new failures. This series can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538 Signed-off-by: Ben Gardon <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 063afac commit f8e1449

File tree

3 files changed

+128
-7
lines changed

3 files changed

+128
-7
lines changed

arch/x86/kvm/mmu/mmu.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1558,12 +1558,24 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
15581558

15591559
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
15601560
{
1561-
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
1561+
int young = false;
1562+
1563+
young = kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
1564+
if (kvm->arch.tdp_mmu_enabled)
1565+
young |= kvm_tdp_mmu_age_hva_range(kvm, start, end);
1566+
1567+
return young;
15621568
}
15631569

15641570
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
15651571
{
1566-
return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
1572+
int young = false;
1573+
1574+
young = kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
1575+
if (kvm->arch.tdp_mmu_enabled)
1576+
young |= kvm_tdp_mmu_test_age_hva(kvm, hva);
1577+
1578+
return young;
15671579
}
15681580

15691581
#ifdef MMU_DEBUG

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,18 @@ static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
149149
return sp->role.smm ? 1 : 0;
150150
}
151151

152+
static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
153+
{
154+
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
155+
156+
if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level))
157+
return;
158+
159+
if (is_accessed_spte(old_spte) &&
160+
(!is_accessed_spte(new_spte) || pfn_changed))
161+
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
162+
}
163+
152164
/**
153165
* handle_changed_spte - handle bookkeeping associated with an SPTE change
154166
* @kvm: kvm instance
@@ -260,24 +272,48 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
260272
u64 old_spte, u64 new_spte, int level)
261273
{
262274
__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
275+
handle_changed_spte_acc_track(old_spte, new_spte, level);
263276
}
264277

265-
static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
266-
u64 new_spte)
278+
static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
279+
u64 new_spte, bool record_acc_track)
267280
{
268281
u64 *root_pt = tdp_iter_root_pt(iter);
269282
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
270283
int as_id = kvm_mmu_page_as_id(root);
271284

272-
*iter->sptep = new_spte;
285+
WRITE_ONCE(*iter->sptep, new_spte);
286+
287+
__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
288+
iter->level);
289+
if (record_acc_track)
290+
handle_changed_spte_acc_track(iter->old_spte, new_spte,
291+
iter->level);
292+
}
293+
294+
static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
295+
u64 new_spte)
296+
{
297+
__tdp_mmu_set_spte(kvm, iter, new_spte, true);
298+
}
273299

274-
handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
275-
iter->level);
300+
static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
301+
struct tdp_iter *iter,
302+
u64 new_spte)
303+
{
304+
__tdp_mmu_set_spte(kvm, iter, new_spte, false);
276305
}
277306

278307
#define tdp_root_for_each_pte(_iter, _root, _start, _end) \
279308
for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end)
280309

310+
#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \
311+
tdp_root_for_each_pte(_iter, _root, _start, _end) \
312+
if (!is_shadow_present_pte(_iter.old_spte) || \
313+
!is_last_spte(_iter.old_spte, _iter.level)) \
314+
continue; \
315+
else
316+
281317
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
282318
for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \
283319
_mmu->shadow_root_level, _start, _end)
@@ -566,3 +602,72 @@ int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
566602
return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
567603
zap_gfn_range_hva_wrapper);
568604
}
605+
606+
/*
607+
* Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
608+
* if any of the GFNs in the range have been accessed.
609+
*/
610+
static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
611+
struct kvm_mmu_page *root, gfn_t start, gfn_t end,
612+
unsigned long unused)
613+
{
614+
struct tdp_iter iter;
615+
int young = 0;
616+
u64 new_spte = 0;
617+
618+
tdp_root_for_each_leaf_pte(iter, root, start, end) {
619+
/*
620+
* If we have a non-accessed entry we don't need to change the
621+
* pte.
622+
*/
623+
if (!is_accessed_spte(iter.old_spte))
624+
continue;
625+
626+
new_spte = iter.old_spte;
627+
628+
if (spte_ad_enabled(new_spte)) {
629+
clear_bit((ffs(shadow_accessed_mask) - 1),
630+
(unsigned long *)&new_spte);
631+
} else {
632+
/*
633+
* Capture the dirty status of the page, so that it doesn't get
634+
* lost when the SPTE is marked for access tracking.
635+
*/
636+
if (is_writable_pte(new_spte))
637+
kvm_set_pfn_dirty(spte_to_pfn(new_spte));
638+
639+
new_spte = mark_spte_for_access_track(new_spte);
640+
}
641+
642+
tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte);
643+
young = 1;
644+
}
645+
646+
return young;
647+
}
648+
649+
int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
650+
unsigned long end)
651+
{
652+
return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
653+
age_gfn_range);
654+
}
655+
656+
static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
657+
struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused,
658+
unsigned long unused2)
659+
{
660+
struct tdp_iter iter;
661+
662+
tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1)
663+
if (is_accessed_spte(iter.old_spte))
664+
return 1;
665+
666+
return 0;
667+
}
668+
669+
int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva)
670+
{
671+
return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0,
672+
test_age_gfn);
673+
}

arch/x86/kvm/mmu/tdp_mmu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
2121

2222
int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
2323
unsigned long end);
24+
25+
int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
26+
unsigned long end);
27+
int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva);
2428
#endif /* __KVM_X86_MMU_TDP_MMU_H */

0 commit comments

Comments
 (0)