Linux KVM内核实现编译成kvm.ko,或者built-in到了内核,以下假设总是kvm.ko。kvm.ko暴露给用户态是/dev/kvm字符设备。在arch/arm64/configs/defconfig定义了:
在arch/arm64/kvm/Makefile中:
include $(srctree)/virt/kvm/Makefile.kvmobj-$(CONFIG_KVM) += kvm.oobj-$(CONFIG_KVM) += hyp/CFLAGS_sys_regs.o += -Wno-override-initCFLAGS_handle_exit.o += -Wno-override-initkvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o config.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \ vgic/vgic-v5.o ......
可以看到arch/arm64/kvm/Makefile将kernel下面的virt/kvm/Makefile.kvm加入进来了。这些c语言一起最后编译成了kvm.ko。
入口在arch/arm64/kvm/arm.c中:
module_init(kvm_arm_init);/* Initialize Hyp-mode and memory mappings on all CPUs */static __initintkvm_arm_init(void){ int err; bool in_hyp_mode; if (!is_hyp_mode_available()) { kvm_info("HYP mode not available\n"); return -ENODEV; } if (kvm_get_mode() == KVM_MODE_NONE) { kvm_info("KVM disabled from command line\n"); return -ENODEV; } err = kvm_sys_reg_table_init(); in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ "Only trusted guests should be used on this system.\n"); err = kvm_set_ipa_limit(); err = kvm_arm_init_sve(); err = kvm_arm_vmid_alloc_init(); if (!in_hyp_mode) { err = init_hyp_mode(); } err = kvm_init_vector_slots(); err = init_subsystems(); kvm_info("%s%sVHE%s mode initialized successfully\n", in_hyp_mode ? "" : (is_protected_kvm_enabled() ? "Protected " : "Hyp "), in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? "h" : "n"), cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": ""); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM * hypervisor protection is finalized. */ err = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (err) goto out_subs; /* * This should be called after initialization is done and failure isn't * possible anymore. */ if (!in_hyp_mode) finalize_init_hyp_mode(); kvm_arm_initialised = true; return 0; }
一个运行的dmesg例子如下:
[ 0.158367] kvm [1]: IPA Size Limit: 48 bits[ 0.158406] kvm [1]: GICv3: no GICV resource entry[ 0.158414] kvm [1]: disabling GICv2 emulation[ 0.158437] kvm [1]: GIC system register CPU interface enabled[ 0.158508] kvm [1]: vgic interrupt IRQ9[ 0.158661] kvm [1]: VHE mode initialized successfully
kvm_arm_init调用了kvm_init。它定义在virt/kvm/kvm_main.c中,会注册/dev/kvm:
intkvm_init(unsigned vcpu_size, unsigned vcpu_align, structmodule *module){ int r; int cpu; /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); kvm_vcpu_cache = kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, SLAB_ACCOUNT, offsetof(struct kvm_vcpu, arch), offsetofend(struct kvm_vcpu, stats_id) - offsetof(struct kvm_vcpu, arch), NULL); for_each_possible_cpu(cpu) { if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), GFP_KERNEL, cpu_to_node(cpu))) { r = -ENOMEM; goto err_cpu_kick_mask; } } r = kvm_irqfd_init(); r = kvm_async_pf_init(); kvm_chardev_ops.owner = module; kvm_vm_fops.owner = module; kvm_vcpu_fops.owner = module; kvm_device_fops.owner = module; kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; kvm_init_debug(); r = kvm_vfio_ops_init(); r = kvm_gmem_init(module); r = kvm_init_virtualization(); /* * Registration _must_ be the very last thing done, as this exposes * /dev/kvm to userspace, i.e. all infrastructure must be setup! */ r = misc_register(&kvm_dev); return 0;}