Skip to content

Commit e6714bd

Browse files
legoaterpaulusmack
authored andcommitted
KVM: PPC: Book3S HV: XIVE: Add a control to dirty the XIVE EQ pages
When migration of a VM is initiated, a first copy of the RAM is transferred to the destination before the VM is stopped, but there is no guarantee that the EQ pages in which the event notifications are queued have not been modified. To make sure migration will capture a consistent memory state, the XIVE device should perform a XIVE quiesce sequence to stop the flow of event notifications and stabilize the EQs. This is the purpose of the KVM_DEV_XIVE_EQ_SYNC control which will also marks the EQ pages dirty to force their transfer. Signed-off-by: Cédric Le Goater <[email protected]> Reviewed-by: David Gibson <[email protected]> Signed-off-by: Paul Mackerras <[email protected]>
1 parent 7b46b61 commit e6714bd

File tree

3 files changed

+115
-0
lines changed

3 files changed

+115
-0
lines changed

Documentation/virtual/kvm/devices/xive.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
2323
queues. To be used by kexec and kdump.
2424
Errors: none
2525

26+
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
27+
Sync all the sources and queues and mark the EQ pages dirty. This
28+
to make sure that a consistent memory state is captured when
29+
migrating the VM.
30+
Errors: none
31+
2632
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
2733
Initializes a new source in the XIVE device and mask it.
2834
Attributes:
@@ -100,3 +106,26 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
100106
Errors:
101107
-ENOENT: Unknown source number
102108
-EINVAL: Not initialized source number
109+
110+
* Migration:
111+
112+
Saving the state of a VM using the XIVE native exploitation mode
113+
should follow a specific sequence. When the VM is stopped :
114+
115+
1. Mask all sources (PQ=01) to stop the flow of events.
116+
117+
2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
118+
flush any in-flight event notification and to stabilize the EQs. At
119+
this stage, the EQ pages are marked dirty to make sure they are
120+
transferred in the migration sequence.
121+
122+
3. Capture the state of the source targeting, the EQs configuration
123+
and the state of thread interrupt context registers.
124+
125+
Restore is similar :
126+
127+
1. Restore the EQ configuration. As targeting depends on it.
128+
2. Restore targeting
129+
3. Restore the thread interrupt contexts
130+
4. Restore the source states
131+
5. Let the vCPU run

arch/powerpc/include/uapi/asm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ struct kvm_ppc_cpu_char {
680680
/* POWER9 XIVE Native Interrupt Controller */
681681
#define KVM_DEV_XIVE_GRP_CTRL 1
682682
#define KVM_DEV_XIVE_RESET 1
683+
#define KVM_DEV_XIVE_EQ_SYNC 2
683684
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
684685
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
685686
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */

arch/powerpc/kvm/book3s_xive_native.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,88 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
681681
return 0;
682682
}
683683

684+
static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
685+
{
686+
int j;
687+
688+
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
689+
struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
690+
struct xive_irq_data *xd;
691+
u32 hw_num;
692+
693+
if (!state->valid)
694+
continue;
695+
696+
/*
697+
* The struct kvmppc_xive_irq_state reflects the state
698+
* of the EAS configuration and not the state of the
699+
* source. The source is masked setting the PQ bits to
700+
* '-Q', which is what is being done before calling
701+
* the KVM_DEV_XIVE_EQ_SYNC control.
702+
*
703+
* If a source EAS is configured, OPAL syncs the XIVE
704+
* IC of the source and the XIVE IC of the previous
705+
* target if any.
706+
*
707+
* So it should be fine ignoring MASKED sources as
708+
* they have been synced already.
709+
*/
710+
if (state->act_priority == MASKED)
711+
continue;
712+
713+
kvmppc_xive_select_irq(state, &hw_num, &xd);
714+
xive_native_sync_source(hw_num);
715+
xive_native_sync_queue(hw_num);
716+
}
717+
}
718+
719+
static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
720+
{
721+
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
722+
unsigned int prio;
723+
724+
if (!xc)
725+
return -ENOENT;
726+
727+
for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
728+
struct xive_q *q = &xc->queues[prio];
729+
730+
if (!q->qpage)
731+
continue;
732+
733+
/* Mark EQ page dirty for migration */
734+
mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
735+
}
736+
return 0;
737+
}
738+
739+
static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
740+
{
741+
struct kvm *kvm = xive->kvm;
742+
struct kvm_vcpu *vcpu;
743+
unsigned int i;
744+
745+
pr_devel("%s\n", __func__);
746+
747+
mutex_lock(&kvm->lock);
748+
for (i = 0; i <= xive->max_sbid; i++) {
749+
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
750+
751+
if (sb) {
752+
arch_spin_lock(&sb->lock);
753+
kvmppc_xive_native_sync_sources(sb);
754+
arch_spin_unlock(&sb->lock);
755+
}
756+
}
757+
758+
kvm_for_each_vcpu(i, vcpu, kvm) {
759+
kvmppc_xive_native_vcpu_eq_sync(vcpu);
760+
}
761+
mutex_unlock(&kvm->lock);
762+
763+
return 0;
764+
}
765+
684766
static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
685767
struct kvm_device_attr *attr)
686768
{
@@ -691,6 +773,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
691773
switch (attr->attr) {
692774
case KVM_DEV_XIVE_RESET:
693775
return kvmppc_xive_reset(xive);
776+
case KVM_DEV_XIVE_EQ_SYNC:
777+
return kvmppc_xive_native_eq_sync(xive);
694778
}
695779
break;
696780
case KVM_DEV_XIVE_GRP_SOURCE:
@@ -729,6 +813,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
729813
case KVM_DEV_XIVE_GRP_CTRL:
730814
switch (attr->attr) {
731815
case KVM_DEV_XIVE_RESET:
816+
case KVM_DEV_XIVE_EQ_SYNC:
732817
return 0;
733818
}
734819
break;

0 commit comments

Comments
 (0)