arch/arm64/kvm/vgic/vgic-v3-nested.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only

 #include <linux/cpu.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>

 #include <kvm/arm_vgic.h>

 #include <asm/kvm_arm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_nested.h>

 #include "vgic.h"

 #define ICH_LRN(n)	(ICH_LR0_EL2 + (n))
 #define ICH_AP0RN(n)	(ICH_AP0R0_EL2 + (n))
 #define ICH_AP1RN(n)	(ICH_AP1R0_EL2 + (n))

 struct mi_state {
 	u16	eisr;
 	u16	elrsr;
 	bool	pend;
 };

 /*
  * The shadow registers loaded to the hardware when running a L2 guest
  * with the virtual IMO/FMO bits set.
  */
 struct shadow_if {
 	struct vgic_v3_cpu_if	cpuif;
 	unsigned long		lr_map;
 };

 static DEFINE_PER_CPU(struct shadow_if, shadow_if);

 static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
 {
 	return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
 }

 /*
  * Nesting GICv3 support
  *
  * On a non-nesting VM (only running at EL0/EL1), the host hypervisor
  * completely controls the interrupts injected via the list registers.
  * Consequently, most of the state that is modified by the guest (by ACK-ing
  * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
  * keep a semi-consistent view of the interrupts.
  *
  * This still applies for a NV guest, but only while "InHost" (either
  * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
  *
  * When running a L2 guest ("not InHost"), things are radically different,
  * as the L1 guest is in charge of provisioning the interrupts via its own
  * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
  * page.  This means that the flow described above does work (there is no
  * state to rebuild in the L0 hypervisor), and that most things happed on L2
  * load/put:
  *
  * - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
  *   per-CPU data structure that is used to populate the actual LRs. This is
  *   an extra copy that we could avoid, but life is short. In the process,
  *   we remap any interrupt that has the HW bit set to the mapped interrupt
  *   on the host, should the host consider it a HW one. This allows the HW
  *   deactivation to take its course, such as for the timer.
  *
  * - on L2 put: perform the inverse transformation, so that the result of L2
  *   running becomes visible to L1 in the VNCR-accessible registers.
  *
  * - there is nothing to do on L2 entry, as everything will have happened
  *   on load. However, this is the point where we detect that an interrupt
  *   targeting L1 and prepare the grand switcheroo.
  *
  * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
  *   interrupt. The L0 active state will be cleared by the HW if the L1
  *   interrupt was itself backed by a HW interrupt.
  *
  * Maintenance Interrupt (MI) management:
  *
  * Since the L2 guest runs the vgic in its full glory, MIs get delivered and
  * used as a handover point between L2 and L1.
  *
  * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
  *   and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
  *   run and process the MI.
  *
  * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
  *   state must be computed at each entry/exit of the guest, much like we do
  *   it for the PMU interrupt.
  *
  * - because most of the ICH_*_EL2 registers live in the VNCR page, the
  *   quality of emulation is poor: L1 can setup the vgic so that an MI would
  *   immediately fire, and not observe anything until the next exit. Trying
  *   to read ICH_MISR_EL2 would do the trick, for example.
  *
  * System register emulation:
  *
  * We get two classes of registers:
  *
  * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access
  *   them, and L0 doesn't see a thing.
  *
  * - those that always trap (ELRSR, EISR, MISR): these are status registers
  *   that are built on the fly based on the in-memory state.
  *
  * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot,
  * and a NV L2 would either access the VNCR page provided by L1 (memory
  * based registers), or see the access redirected to L1 (registers that
  * trap) thanks to NV being set by L1.
  */

 bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
 {
 	u64 xmo;

 	if (is_nested_ctxt(vcpu)) {
 		xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
 		WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
 			  "Separate virtual IRQ/FIQ settings not supported\n");

 		return !!xmo;
 	}

 	return false;
 }

 static struct shadow_if *get_shadow_if(void)
 {
 	return this_cpu_ptr(&shadow_if);
 }

 static bool lr_triggers_eoi(u64 lr)
 {
 	return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
 }

 static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state)
 {
 	u16 eisr = 0, elrsr = 0;
 	bool pend = false;

 	for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
 		u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));

 		if (lr_triggers_eoi(lr))
 			eisr |= BIT(i);
 		if (!(lr & ICH_LR_STATE))
 			elrsr |= BIT(i);
 		pend |= (lr & ICH_LR_PENDING_BIT);
 	}

 	mi_state->eisr	= eisr;
 	mi_state->elrsr	= elrsr;
 	mi_state->pend	= pend;
 }

 u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
 {
 	struct mi_state mi_state;

 	vgic_compute_mi_state(vcpu, &mi_state);
 	return mi_state.eisr;
 }

 u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
 {
 	struct mi_state mi_state;

 	vgic_compute_mi_state(vcpu, &mi_state);
 	return mi_state.elrsr;
 }

 u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
 {
 	struct mi_state mi_state;
 	u64 reg = 0, hcr, vmcr;

 	hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
 	vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);

 	vgic_compute_mi_state(vcpu, &mi_state);

 	if (mi_state.eisr)
 		reg |= ICH_MISR_EL2_EOI;

 	if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) {
 		int used_lrs = kvm_vgic_global_state.nr_lr;

 		used_lrs -= hweight16(mi_state.elrsr);
 		reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0;
 	}

 	if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr))
 		reg |= ICH_MISR_EL2_LRENP;

 	if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend)
 		reg |= ICH_MISR_EL2_NP;

 	if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK))
 		reg |= ICH_MISR_EL2_VGrp0E;

 	if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK))
 		reg |= ICH_MISR_EL2_VGrp0D;

 	if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK))
 		reg |= ICH_MISR_EL2_VGrp1E;

 	if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK))
 		reg |= ICH_MISR_EL2_VGrp1D;

 	return reg;
 }

 static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
 {
 	struct vgic_irq *irq;

 	if (!(lr & ICH_LR_HW))
 		return lr;

 	/* We have the HW bit set, check for validity of pINTID */
 	irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
 	/* If there was no real mapping, nuke the HW bit */
 	if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
 		lr &= ~ICH_LR_HW;

 	/* Translate the virtual mapping to the real one, even if invalid */
 	if (irq) {
 		lr &= ~ICH_LR_PHYS_ID_MASK;
 		lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
 		vgic_put_irq(vcpu->kvm, irq);
 	}

 	return lr;
 }

 /*
  * For LRs which have HW bit set such as timer interrupts, we modify them to
  * have the host hardware interrupt number instead of the virtual one programmed
  * by the guest hypervisor.
  */
 static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
 				     struct vgic_v3_cpu_if *s_cpu_if)
 {
 	struct shadow_if *shadow_if;

 	shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
 	shadow_if->lr_map = 0;

 	for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
 		u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));

 		if (!(lr & ICH_LR_STATE))
 			continue;

 		lr = translate_lr_pintid(vcpu, lr);

 		s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
 		shadow_if->lr_map |= BIT(i);
 	}

 	s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
 }

 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
 {
 	struct shadow_if *shadow_if = get_shadow_if();
 	int i;

 	for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
 		u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
 		struct vgic_irq *irq;

 		if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
 			continue;

 		/*
 		 * If we had a HW lr programmed by the guest hypervisor, we
 		 * need to emulate the HW effect between the guest hypervisor
 		 * and the nested guest.
 		 */
 		irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
 		if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
 			continue;

 		lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
 		if (!(lr & ICH_LR_STATE))
 			irq->active = false;

 		vgic_put_irq(vcpu->kvm, irq);
 	}
 }

 static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
 					struct vgic_v3_cpu_if *s_cpu_if)
 {
 	struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u64 val = 0;
 	int i;

 	/*
 	 * If we're on a system with a broken vgic that requires
 	 * trapping, propagate the trapping requirements.
 	 *
 	 * Ah, the smell of rotten fruits...
 	 */
 	if (static_branch_unlikely(&vgic_v3_cpuif_trap))
 		val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
 					   ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
 	s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
 	s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
 	s_cpu_if->vgic_sre = host_if->vgic_sre;

 	for (i = 0; i < 4; i++) {
 		s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
 		s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
 	}

 	vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
 }

 void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
 {
 	struct shadow_if *shadow_if = get_shadow_if();
 	struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;

 	BUG_ON(!vgic_state_is_nested(vcpu));

 	vgic_v3_create_shadow_state(vcpu, cpu_if);

 	__vgic_v3_restore_vmcr_aprs(cpu_if);
 	__vgic_v3_activate_traps(cpu_if);

 	__vgic_v3_restore_state(cpu_if);

 	/*
 	 * Propagate the number of used LRs for the benefit of the HYP
 	 * GICv3 emulation code. Yes, this is a pretty sorry hack.
 	 */
 	vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs;
 }

 void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
 {
 	struct shadow_if *shadow_if = get_shadow_if();
 	struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
 	u64 val;
 	int i;

 	__vgic_v3_save_vmcr_aprs(s_cpu_if);
 	__vgic_v3_deactivate_traps(s_cpu_if);
 	__vgic_v3_save_state(s_cpu_if);

 	/*
 	 * Translate the shadow state HW fields back to the virtual ones
 	 * before copying the shadow struct back to the nested one.
 	 */
 	val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
 	val &= ~ICH_HCR_EL2_EOIcount_MASK;
 	val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
 	__vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
 	__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);

 	for (i = 0; i < 4; i++) {
 		__vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
 		__vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
 	}

 	for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
 		val = __vcpu_sys_reg(vcpu, ICH_LRN(i));

 		val &= ~ICH_LR_STATE;
 		val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;

 		__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
 	}

 	vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
 }

 /*
  * If we exit a L2 VM with a pending maintenance interrupt from the GIC,
  * then we need to forward this to L1 so that it can re-sync the appropriate
  * LRs and sample level triggered interrupts again.
  */
 void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
 {
 	bool state = read_sysreg_s(SYS_ICH_MISR_EL2);

 	/* This will force a switch back to L1 if the level is high */
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
 			    vcpu->kvm->arch.vgic.mi_intid, state, vcpu);

 	sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
 }

 void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
 {
 	bool level;

 	level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
 			    vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
 }
	// SPDX-License-Identifier: GPL-2.0-only

	#include <linux/cpu.h>
	#include <linux/kvm.h>
	#include <linux/kvm_host.h>
	#include <linux/interrupt.h>
	#include <linux/io.h>
	#include <linux/uaccess.h>

	#include <kvm/arm_vgic.h>

	#include <asm/kvm_arm.h>
	#include <asm/kvm_emulate.h>
	#include <asm/kvm_nested.h>

	#include "vgic.h"

	#define ICH_LRN(n) (ICH_LR0_EL2 + (n))
	#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n))
	#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n))

	struct mi_state {
	u16 eisr;
	u16 elrsr;
	bool pend;
	};

	/*
	* The shadow registers loaded to the hardware when running a L2 guest
	* with the virtual IMO/FMO bits set.
	*/
	struct shadow_if {
	struct vgic_v3_cpu_if cpuif;
	unsigned long lr_map;
	};

	static DEFINE_PER_CPU(struct shadow_if, shadow_if);

	static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
	{
	return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
	}

	/*
	* Nesting GICv3 support
	*
	* On a non-nesting VM (only running at EL0/EL1), the host hypervisor
	* completely controls the interrupts injected via the list registers.
	* Consequently, most of the state that is modified by the guest (by ACK-ing
	* and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
	* keep a semi-consistent view of the interrupts.
	*
	* This still applies for a NV guest, but only while "InHost" (either
	* running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
	*
	* When running a L2 guest ("not InHost"), things are radically different,
	* as the L1 guest is in charge of provisioning the interrupts via its own
	* view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
	* page. This means that the flow described above does work (there is no
	* state to rebuild in the L0 hypervisor), and that most things happed on L2
	* load/put:
	*
	* - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
	* per-CPU data structure that is used to populate the actual LRs. This is
	* an extra copy that we could avoid, but life is short. In the process,
	* we remap any interrupt that has the HW bit set to the mapped interrupt
	* on the host, should the host consider it a HW one. This allows the HW
	* deactivation to take its course, such as for the timer.
	*
	* - on L2 put: perform the inverse transformation, so that the result of L2
	* running becomes visible to L1 in the VNCR-accessible registers.
	*
	* - there is nothing to do on L2 entry, as everything will have happened
	* on load. However, this is the point where we detect that an interrupt
	* targeting L1 and prepare the grand switcheroo.
	*
	* - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
	* interrupt. The L0 active state will be cleared by the HW if the L1
	* interrupt was itself backed by a HW interrupt.
	*
	* Maintenance Interrupt (MI) management:
	*
	* Since the L2 guest runs the vgic in its full glory, MIs get delivered and
	* used as a handover point between L2 and L1.
	*
	* - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
	* and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
	* run and process the MI.
	*
	* - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
	* state must be computed at each entry/exit of the guest, much like we do
	* it for the PMU interrupt.
	*
	* - because most of the ICH_*_EL2 registers live in the VNCR page, the
	* quality of emulation is poor: L1 can setup the vgic so that an MI would
	* immediately fire, and not observe anything until the next exit. Trying
	* to read ICH_MISR_EL2 would do the trick, for example.
	*
	* System register emulation:
	*
	* We get two classes of registers:
	*
	* - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access
	* them, and L0 doesn't see a thing.
	*
	* - those that always trap (ELRSR, EISR, MISR): these are status registers
	* that are built on the fly based on the in-memory state.
	*
	* Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot,
	* and a NV L2 would either access the VNCR page provided by L1 (memory
	* based registers), or see the access redirected to L1 (registers that
	* trap) thanks to NV being set by L1.
	*/

	bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
	{
	u64 xmo;

	if (is_nested_ctxt(vcpu)) {
	xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO \| HCR_FMO);
	WARN_ONCE(xmo && xmo != (HCR_IMO \| HCR_FMO),
	"Separate virtual IRQ/FIQ settings not supported\n");

	return !!xmo;
	}

	return false;
	}

	static struct shadow_if *get_shadow_if(void)
	{
	return this_cpu_ptr(&shadow_if);
	}

	static bool lr_triggers_eoi(u64 lr)
	{
	return !(lr & (ICH_LR_STATE \| ICH_LR_HW)) && (lr & ICH_LR_EOI);
	}

	static void vgic_compute_mi_state(struct kvm_vcpu vcpu, struct mi_state mi_state)
	{
	u16 eisr = 0, elrsr = 0;
	bool pend = false;

	for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
	u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));

	if (lr_triggers_eoi(lr))
	eisr \|= BIT(i);
	if (!(lr & ICH_LR_STATE))
	elrsr \|= BIT(i);
	pend \|= (lr & ICH_LR_PENDING_BIT);
	}

	mi_state->eisr = eisr;
	mi_state->elrsr = elrsr;
	mi_state->pend = pend;
	}

	u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
	{
	struct mi_state mi_state;

	vgic_compute_mi_state(vcpu, &mi_state);
	return mi_state.eisr;
	}

	u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
	{
	struct mi_state mi_state;

	vgic_compute_mi_state(vcpu, &mi_state);
	return mi_state.elrsr;
	}

	u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
	{
	struct mi_state mi_state;
	u64 reg = 0, hcr, vmcr;

	hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
	vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);

	vgic_compute_mi_state(vcpu, &mi_state);

	if (mi_state.eisr)
	reg \|= ICH_MISR_EL2_EOI;

	if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) {
	int used_lrs = kvm_vgic_global_state.nr_lr;

	used_lrs -= hweight16(mi_state.elrsr);
	reg \|= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0;
	}

	if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr))
	reg \|= ICH_MISR_EL2_LRENP;

	if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend)
	reg \|= ICH_MISR_EL2_NP;

	if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK))
	reg \|= ICH_MISR_EL2_VGrp0E;

	if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK))
	reg \|= ICH_MISR_EL2_VGrp0D;

	if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK))
	reg \|= ICH_MISR_EL2_VGrp1E;

	if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK))
	reg \|= ICH_MISR_EL2_VGrp1D;

	return reg;
	}

	static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
	{
	struct vgic_irq *irq;

	if (!(lr & ICH_LR_HW))
	return lr;

	/* We have the HW bit set, check for validity of pINTID */
	irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
	/* If there was no real mapping, nuke the HW bit */
	if (!irq \|\| !irq->hw \|\| irq->intid > VGIC_MAX_SPI)
	lr &= ~ICH_LR_HW;

	/* Translate the virtual mapping to the real one, even if invalid */
	if (irq) {
	lr &= ~ICH_LR_PHYS_ID_MASK;
	lr \|= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
	vgic_put_irq(vcpu->kvm, irq);
	}

	return lr;
	}

	/*
	* For LRs which have HW bit set such as timer interrupts, we modify them to
	* have the host hardware interrupt number instead of the virtual one programmed
	* by the guest hypervisor.
	*/
	static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
	struct vgic_v3_cpu_if *s_cpu_if)
	{
	struct shadow_if *shadow_if;

	shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
	shadow_if->lr_map = 0;

	for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
	u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));

	if (!(lr & ICH_LR_STATE))
	continue;

	lr = translate_lr_pintid(vcpu, lr);

	s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
	shadow_if->lr_map \|= BIT(i);
	}

	s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
	}

	void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
	{
	struct shadow_if *shadow_if = get_shadow_if();
	int i;

	for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
	u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
	struct vgic_irq *irq;

	if (!(lr & ICH_LR_HW) \|\| !(lr & ICH_LR_STATE))
	continue;

	/*
	* If we had a HW lr programmed by the guest hypervisor, we
	* need to emulate the HW effect between the guest hypervisor
	* and the nested guest.
	*/
	irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
	if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
	continue;

	lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
	if (!(lr & ICH_LR_STATE))
	irq->active = false;

	vgic_put_irq(vcpu->kvm, irq);
	}
	}

	static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
	struct vgic_v3_cpu_if *s_cpu_if)
	{
	struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
	u64 val = 0;
	int i;

	/*
	* If we're on a system with a broken vgic that requires
	* trapping, propagate the trapping requirements.
	*
	* Ah, the smell of rotten fruits...
	*/
	if (static_branch_unlikely(&vgic_v3_cpuif_trap))
	val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 \| ICH_HCR_EL2_TALL1 \|
	ICH_HCR_EL2_TC \| ICH_HCR_EL2_TDIR);
	s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) \| val;
	s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
	s_cpu_if->vgic_sre = host_if->vgic_sre;

	for (i = 0; i < 4; i++) {
	s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
	s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
	}

	vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
	}

	void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
	{
	struct shadow_if *shadow_if = get_shadow_if();
	struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;

	BUG_ON(!vgic_state_is_nested(vcpu));

	vgic_v3_create_shadow_state(vcpu, cpu_if);

	__vgic_v3_restore_vmcr_aprs(cpu_if);
	__vgic_v3_activate_traps(cpu_if);

	__vgic_v3_restore_state(cpu_if);

	/*
	* Propagate the number of used LRs for the benefit of the HYP
	* GICv3 emulation code. Yes, this is a pretty sorry hack.
	*/
	vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs;
	}

	void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
	{
	struct shadow_if *shadow_if = get_shadow_if();
	struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
	u64 val;
	int i;

	__vgic_v3_save_vmcr_aprs(s_cpu_if);
	__vgic_v3_deactivate_traps(s_cpu_if);
	__vgic_v3_save_state(s_cpu_if);

	/*
	* Translate the shadow state HW fields back to the virtual ones
	* before copying the shadow struct back to the nested one.
	*/
	val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
	val &= ~ICH_HCR_EL2_EOIcount_MASK;
	val \|= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
	__vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
	__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);

	for (i = 0; i < 4; i++) {
	__vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
	__vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
	}

	for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
	val = __vcpu_sys_reg(vcpu, ICH_LRN(i));

	val &= ~ICH_LR_STATE;
	val \|= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;

	__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
	}

	vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
	}

	/*
	* If we exit a L2 VM with a pending maintenance interrupt from the GIC,
	* then we need to forward this to L1 so that it can re-sync the appropriate
	* LRs and sample level triggered interrupts again.
	*/
	void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
	{
	bool state = read_sysreg_s(SYS_ICH_MISR_EL2);

	/* This will force a switch back to L1 if the level is high */
	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
	vcpu->kvm->arch.vgic.mi_intid, state, vcpu);

	sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
	}

	void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
	{
	bool level;

	level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
	kvm_vgic_inject_irq(vcpu->kvm, vcpu,
	vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
	}