// SPDX-License-Identifier: GPL-2.0 /* * Copyright(c) 2023 Intel Corporation. * * Intel Trusted Domain Extensions (TDX) support */ #define pr_fmt(fmt) "virt/tdx: " fmt #include <linux/types.h> #include <linux/cache.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/printk.h> #include <linux/cpu.h> #include <linux/spinlock.h> #include <linux/percpu-defs.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/memblock.h> #include <linux/memory.h> #include <linux/minmax.h> #include <linux/sizes.h> #include <linux/pfn.h> #include <linux/align.h> #include <asm/msr-index.h> #include <asm/msr.h> #include <asm/cpufeature.h> #include <asm/tdx.h> #include "tdx.h" static u32 tdx_global_keyid __ro_after_init; static u32 tdx_guest_keyid_start __ro_after_init; static u32 tdx_nr_guest_keyids __ro_after_init; static DEFINE_PER_CPU(bool, tdx_lp_initialized); static struct tdmr_info_list tdx_tdmr_list; static enum tdx_module_status_t tdx_module_status; static DEFINE_MUTEX(tdx_module_lock); /* All TDX-usable memory regions. Protected by mem_hotplug_lock. */ static LIST_HEAD(tdx_memlist); typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) { pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err); } static inline void seamcall_err_ret(u64 fn, u64 err, struct tdx_module_args *args) { seamcall_err(fn, err, args); pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n", args->rcx, args->rdx, args->r8); pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n", args->r9, args->r10, args->r11); } static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, u64 fn, struct tdx_module_args *args) { u64 sret = sc_retry(func, fn, args); if (sret == TDX_SUCCESS) return 0; if (sret == TDX_SEAMCALL_VMFAILINVALID) return -ENODEV; if (sret == TDX_SEAMCALL_GP) return -EOPNOTSUPP; if (sret == TDX_SEAMCALL_UD) return -EACCES; err_func(fn, sret, args); return -EIO; } #define seamcall_prerr(__fn, __args) \ sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args)) #define seamcall_prerr_ret(__fn, __args) \ sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args)) /* * Do the module global initialization once and return its result. * It can be done on any cpu. It's always called with interrupts * disabled. */ static int try_init_module_global(void) { struct tdx_module_args args = {}; static DEFINE_RAW_SPINLOCK(sysinit_lock); static bool sysinit_done; static int sysinit_ret; lockdep_assert_irqs_disabled(); raw_spin_lock(&sysinit_lock); if (sysinit_done) goto out; /* RCX is module attributes and all bits are reserved */ args.rcx = 0; sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args); /* * The first SEAMCALL also detects the TDX module, thus * it can fail due to the TDX module is not loaded. * Dump message to let the user know. */ if (sysinit_ret == -ENODEV) pr_err("module not loaded\n"); sysinit_done = true; out: raw_spin_unlock(&sysinit_lock); return sysinit_ret; } /** * tdx_cpu_enable - Enable TDX on local cpu * * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module * global initialization SEAMCALL if not done) on local cpu to make this * cpu be ready to run any other SEAMCALLs. * * Always call this function via IPI function calls. * * Return 0 on success, otherwise errors. */ int tdx_cpu_enable(void) { struct tdx_module_args args = {}; int ret; if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) return -ENODEV; lockdep_assert_irqs_disabled(); if (__this_cpu_read(tdx_lp_initialized)) return 0; /* * The TDX module global initialization is the very first step * to enable TDX. Need to do it first (if hasn't been done) * before the per-cpu initialization. */ ret = try_init_module_global(); if (ret) return ret; ret = seamcall_prerr(TDH_SYS_LP_INIT, &args); if (ret) return ret; __this_cpu_write(tdx_lp_initialized, true); return 0; } EXPORT_SYMBOL_GPL(tdx_cpu_enable); /* * Add a memory region as a TDX memory block. The caller must make sure * all memory regions are added in address ascending order and don't * overlap. */ static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn, unsigned long end_pfn) { struct tdx_memblock *tmb; tmb = kmalloc(sizeof(*tmb), GFP_KERNEL); if (!tmb) return -ENOMEM; INIT_LIST_HEAD(&tmb->list); tmb->start_pfn = start_pfn; tmb->end_pfn = end_pfn; /* @tmb_list is protected by mem_hotplug_lock */ list_add_tail(&tmb->list, tmb_list); return 0; } static void free_tdx_memlist(struct list_head *tmb_list) { /* @tmb_list is protected by mem_hotplug_lock */ while (!list_empty(tmb_list)) { struct tdx_memblock *tmb = list_first_entry(tmb_list, struct tdx_memblock, list); list_del(&tmb->list); kfree(tmb); } } /* * Ensure that all memblock memory regions are convertible to TDX * memory. Once this has been established, stash the memblock * ranges off in a secondary structure because memblock is modified * in memory hotplug while TDX memory regions are fixed. */ static int build_tdx_memlist(struct list_head *tmb_list) { unsigned long start_pfn, end_pfn; int i, ret; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { /* * The first 1MB is not reported as TDX convertible memory. * Although the first 1MB is always reserved and won't end up * to the page allocator, it is still in memblock's memory * regions. Skip them manually to exclude them as TDX memory. */ start_pfn = max(start_pfn, PHYS_PFN(SZ_1M)); if (start_pfn >= end_pfn) continue; /* * Add the memory regions as TDX memory. The regions in * memblock has already guaranteed they are in address * ascending order and don't overlap. */ ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn); if (ret) goto err; } return 0; err: free_tdx_memlist(tmb_list); return ret; } static int read_sys_metadata_field(u64 field_id, u64 *data) { struct tdx_module_args args = {}; int ret; /* * TDH.SYS.RD -- reads one global metadata field * - RDX (in): the field to read * - R8 (out): the field data */ args.rdx = field_id; ret = seamcall_prerr_ret(TDH_SYS_RD, &args); if (ret) return ret; *data = args.r8; return 0; } static int read_sys_metadata_field16(u64 field_id, int offset, struct tdx_tdmr_sysinfo *ts) { u16 *ts_member = ((void *)ts) + offset; u64 tmp; int ret; if (WARN_ON_ONCE(MD_FIELD_ID_ELE_SIZE_CODE(field_id) != MD_FIELD_ID_ELE_SIZE_16BIT)) return -EINVAL; ret = read_sys_metadata_field(field_id, &tmp); if (ret) return ret; *ts_member = tmp; return 0; } struct field_mapping { u64 field_id; int offset; }; #define TD_SYSINFO_MAP(_field_id, _offset) \ { .field_id = MD_FIELD_ID_##_field_id, \ .offset = offsetof(struct tdx_tdmr_sysinfo, _offset) } /* Map TD_SYSINFO fields into 'struct tdx_tdmr_sysinfo': */ static const struct field_mapping fields[] = { TD_SYSINFO_MAP(MAX_TDMRS, max_tdmrs), TD_SYSINFO_MAP(MAX_RESERVED_PER_TDMR, max_reserved_per_tdmr), TD_SYSINFO_MAP(PAMT_4K_ENTRY_SIZE, pamt_entry_size[TDX_PS_4K]), TD_SYSINFO_MAP(PAMT_2M_ENTRY_SIZE, pamt_entry_size[TDX_PS_2M]), TD_SYSINFO_MAP(PAMT_1G_ENTRY_SIZE, pamt_entry_size[TDX_PS_1G]), }; static int get_tdx_tdmr_sysinfo(struct tdx_tdmr_sysinfo *tdmr_sysinfo) { int ret; int i; /* Populate 'tdmr_sysinfo' fields using the mapping structure above: */ for (i = 0; i < ARRAY_SIZE(fields); i++) { ret = read_sys_metadata_field16(fields[i].field_id, fields[i].offset, tdmr_sysinfo); if (ret) return ret; } return 0; } /* Calculate the actual TDMR size */ static int tdmr_size_single(u16 max_reserved_per_tdmr) { int tdmr_sz; /* * The actual size of TDMR depends on the maximum * number of reserved areas. */ tdmr_sz = sizeof(struct tdmr_info); tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr; return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT); } static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list, struct tdx_tdmr_sysinfo *tdmr_sysinfo) { size_t tdmr_sz, tdmr_array_sz; void *tdmr_array; tdmr_sz = tdmr_size_single(tdmr_sysinfo->max_reserved_per_tdmr); tdmr_array_sz = tdmr_sz * tdmr_sysinfo->max_tdmrs; /* * To keep things simple, allocate all TDMRs together. * The buffer needs to be physically contiguous to make * sure each TDMR is physically contiguous. */ tdmr_array = alloc_pages_exact(tdmr_array_sz, GFP_KERNEL | __GFP_ZERO); if (!tdmr_array) return -ENOMEM; tdmr_list->tdmrs = tdmr_array; /* * Keep the size of TDMR to find the target TDMR * at a given index in the TDMR list. */ tdmr_list->tdmr_sz = tdmr_sz; tdmr_list->max_tdmrs = tdmr_sysinfo->max_tdmrs; tdmr_list->nr_consumed_tdmrs = 0; return 0; } static void free_tdmr_list(struct tdmr_info_list *tdmr_list) { free_pages_exact(tdmr_list->tdmrs, tdmr_list->max_tdmrs * tdmr_list->tdmr_sz); } /* * Construct a list of TDMRs on the preallocated space in @tdmr_list * to cover all TDX memory regions in @tmb_list based on the TDX module * TDMR global information in @tdmr_sysinfo. */ static int construct_tdmrs(struct list_head *tmb_list, struct tdmr_info_list *tdmr_list, struct tdx_tdmr_sysinfo *tdmr_sysinfo) { /* * TODO: * * - Fill out TDMRs to cover all TDX memory regions. * - Allocate and set up PAMTs for each TDMR. * - Designate reserved areas for each TDMR. * * Return -EINVAL until constructing TDMRs is done */ return -EINVAL; } static int init_tdx_module(void) { struct tdx_tdmr_sysinfo tdmr_sysinfo; int ret; /* * To keep things simple, assume that all TDX-protected memory * will come from the page allocator. Make sure all pages in the * page allocator are TDX-usable memory. * * Build the list of "TDX-usable" memory regions which cover all * pages in the page allocator to guarantee that. Do it while * holding mem_hotplug_lock read-lock as the memory hotplug code * path reads the @tdx_memlist to reject any new memory. */ get_online_mems(); ret = build_tdx_memlist(&tdx_memlist); if (ret) goto out_put_tdxmem; ret = get_tdx_tdmr_sysinfo(&tdmr_sysinfo); if (ret) goto err_free_tdxmem; /* Allocate enough space for constructing TDMRs */ ret = alloc_tdmr_list(&tdx_tdmr_list, &tdmr_sysinfo); if (ret) goto err_free_tdxmem; /* Cover all TDX-usable memory regions in TDMRs */ ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdmr_sysinfo); if (ret) goto err_free_tdmrs; /* * TODO: * * - Configure the TDMRs and the global KeyID to the TDX module. * - Configure the global KeyID on all packages. * - Initialize all TDMRs. * * Return error before all steps are done. */ ret = -EINVAL; if (ret) goto err_free_tdmrs; out_put_tdxmem: /* * @tdx_memlist is written here and read at memory hotplug time. * Lock out memory hotplug code while building it. */ put_online_mems(); return ret; err_free_tdmrs: free_tdmr_list(&tdx_tdmr_list); err_free_tdxmem: free_tdx_memlist(&tdx_memlist); goto out_put_tdxmem; } static int __tdx_enable(void) { int ret; ret = init_tdx_module(); if (ret) { pr_err("module initialization failed (%d)\n", ret); tdx_module_status = TDX_MODULE_ERROR; return ret; } pr_info("module initialized\n"); tdx_module_status = TDX_MODULE_INITIALIZED; return 0; } /** * tdx_enable - Enable TDX module to make it ready to run TDX guests * * This function assumes the caller has: 1) held read lock of CPU hotplug * lock to prevent any new cpu from becoming online; 2) done both VMXON * and tdx_cpu_enable() on all online cpus. * * This function can be called in parallel by multiple callers. * * Return 0 if TDX is enabled successfully, otherwise error. */ int tdx_enable(void) { int ret; if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM)) return -ENODEV; lockdep_assert_cpus_held(); mutex_lock(&tdx_module_lock); switch (tdx_module_status) { case TDX_MODULE_UNINITIALIZED: ret = __tdx_enable(); break; case TDX_MODULE_INITIALIZED: /* Already initialized, great, tell the caller. */ ret = 0; break; default: /* Failed to initialize in the previous attempts */ ret = -EINVAL; break; } mutex_unlock(&tdx_module_lock); return ret; } EXPORT_SYMBOL_GPL(tdx_enable); static __init int record_keyid_partitioning(u32 *tdx_keyid_start, u32 *nr_tdx_keyids) { u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids; int ret; /* * IA32_MKTME_KEYID_PARTIONING: * Bit [31:0]: Number of MKTME KeyIDs. * Bit [63:32]: Number of TDX private KeyIDs. */ ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids, &_nr_tdx_keyids); if (ret || !_nr_tdx_keyids) return -EINVAL; /* TDX KeyIDs start after the last MKTME KeyID. */ _tdx_keyid_start = _nr_mktme_keyids + 1; *tdx_keyid_start = _tdx_keyid_start; *nr_tdx_keyids = _nr_tdx_keyids; return 0; } static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn) { struct tdx_memblock *tmb; /* * This check assumes that the start_pfn<->end_pfn range does not * cross multiple @tdx_memlist entries. A single memory online * event across multiple memblocks (from which @tdx_memlist * entries are derived at the time of module initialization) is * not possible. This is because memory offline/online is done * on granularity of 'struct memory_block', and the hotpluggable * memory region (one memblock) must be multiple of memory_block. */ list_for_each_entry(tmb, &tdx_memlist, list) { if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn) return true; } return false; } static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action, void *v) { struct memory_notify *mn = v; if (action != MEM_GOING_ONLINE) return NOTIFY_OK; /* * Empty list means TDX isn't enabled. Allow any memory * to go online. */ if (list_empty(&tdx_memlist)) return NOTIFY_OK; /* * The TDX memory configuration is static and can not be * changed. Reject onlining any memory which is outside of * the static configuration whether it supports TDX or not. */ if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages)) return NOTIFY_OK; return NOTIFY_BAD; } static struct notifier_block tdx_memory_nb = { .notifier_call = tdx_memory_notifier, }; void __init tdx_init(void) { u32 tdx_keyid_start, nr_tdx_keyids; int err; err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids); if (err) return; pr_info("BIOS enabled: private KeyID range [%u, %u)\n", tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids); /* * The TDX module itself requires one 'global KeyID' to protect * its metadata. If there's only one TDX KeyID, there won't be * any left for TDX guests thus there's no point to enable TDX * at all. */ if (nr_tdx_keyids < 2) { pr_err("initialization failed: too few private KeyIDs available.\n"); return; } err = register_memory_notifier(&tdx_memory_nb); if (err) { pr_err("initialization failed: register_memory_notifier() failed (%d)\n", err); return; } /* * Just use the first TDX KeyID as the 'global KeyID' and * leave the rest for TDX guests. */ tdx_global_keyid = tdx_keyid_start; tdx_guest_keyid_start = tdx_keyid_start + 1; tdx_nr_guest_keyids = nr_tdx_keyids - 1; setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM); }