Commit 0afe832e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cleanups from Ingo Molnar:
 "Misc cleanups"

* 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apm: Fix spelling mistake: "caculate" -> "calculate"
  x86/mtrr: Rename main.c to mtrr.c and remove duplicate prefixes
  x86: Remove pr_fmt duplicate logging prefixes
  x86/early-quirks: Rename duplicate define of dev_err
  x86/bpf: Clean up non-standard comments, to make the code more readable
parents 42964c6f 844ea8f6
...@@ -889,7 +889,7 @@ static void force_ibs_eilvt_setup(void) ...@@ -889,7 +889,7 @@ static void force_ibs_eilvt_setup(void)
if (!ibs_eilvt_valid()) if (!ibs_eilvt_valid())
goto out; goto out;
pr_info("IBS: LVT offset %d assigned\n", offset); pr_info("LVT offset %d assigned\n", offset);
return; return;
out: out:
......
...@@ -2433,7 +2433,7 @@ MODULE_PARM_DESC(idle_threshold, ...@@ -2433,7 +2433,7 @@ MODULE_PARM_DESC(idle_threshold,
"System idle percentage above which to make APM BIOS idle calls"); "System idle percentage above which to make APM BIOS idle calls");
module_param(idle_period, int, 0444); module_param(idle_period, int, 0444);
MODULE_PARM_DESC(idle_period, MODULE_PARM_DESC(idle_period,
"Period (in sec/100) over which to caculate the idle percentage"); "Period (in sec/100) over which to calculate the idle percentage");
module_param(smp, bool, 0444); module_param(smp, bool, 0444);
MODULE_PARM_DESC(smp, MODULE_PARM_DESC(smp,
"Set this to enable APM use on an SMP platform. Use with caution on older systems"); "Set this to enable APM use on an SMP platform. Use with caution on older systems");
......
obj-y := main.o if.o generic.o cleanup.o obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
...@@ -101,7 +101,7 @@ static int have_wrcomb(void) ...@@ -101,7 +101,7 @@ static int have_wrcomb(void)
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
dev->revision <= 5) { dev->revision <= 5) {
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev); pci_dev_put(dev);
return 0; return 0;
} }
...@@ -111,7 +111,7 @@ static int have_wrcomb(void) ...@@ -111,7 +111,7 @@ static int have_wrcomb(void)
*/ */
if (dev->vendor == PCI_VENDOR_ID_INTEL && if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) { dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev); pci_dev_put(dev);
return 0; return 0;
} }
...@@ -313,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, ...@@ -313,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error; return error;
if (type >= MTRR_NUM_TYPES) { if (type >= MTRR_NUM_TYPES) {
pr_warn("mtrr: type: %u invalid\n", type); pr_warn("type: %u invalid\n", type);
return -EINVAL; return -EINVAL;
} }
/* If the type is WC, check that this processor supports it */ /* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
pr_warn("mtrr: your processor doesn't support write-combining\n"); pr_warn("your processor doesn't support write-combining\n");
return -ENOSYS; return -ENOSYS;
} }
if (!size) { if (!size) {
pr_warn("mtrr: zero sized request\n"); pr_warn("zero sized request\n");
return -EINVAL; return -EINVAL;
} }
if ((base | (base + size - 1)) >> if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warn("mtrr: base or size exceeds the MTRR width\n"); pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL; return -EINVAL;
} }
...@@ -361,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, ...@@ -361,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype)) } else if (types_compatible(type, ltype))
continue; continue;
} }
pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing" pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
" 0x%lx000,0x%lx000\n", base, size, lbase,
lsize); lsize);
goto out; goto out;
} }
...@@ -370,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, ...@@ -370,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) { if (ltype != type) {
if (types_compatible(type, ltype)) if (types_compatible(type, ltype))
continue; continue;
pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype), base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type)); mtrr_attrib_to_str(type));
goto out; goto out;
...@@ -396,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, ...@@ -396,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} }
} }
} else { } else {
pr_info("mtrr: no more MTRRs available\n"); pr_info("no more MTRRs available\n");
} }
error = i; error = i;
out: out:
...@@ -408,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, ...@@ -408,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size) static int mtrr_check(unsigned long base, unsigned long size)
{ {
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
pr_warn("mtrr: size and base must be multiples of 4 kiB\n"); pr_warn("size and base must be multiples of 4 kiB\n");
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack(); dump_stack();
return -1; return -1;
} }
...@@ -500,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) ...@@ -500,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
} }
} }
if (reg < 0) { if (reg < 0) {
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", pr_debug("no MTRR for %lx000,%lx000 found\n",
base, size); base, size);
goto out; goto out;
} }
} }
if (reg >= max) { if (reg >= max) {
pr_warn("mtrr: register: %d too big\n", reg); pr_warn("register: %d too big\n", reg);
goto out; goto out;
} }
mtrr_if->get(reg, &lbase, &lsize, &ltype); mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) { if (lsize < 1) {
pr_warn("mtrr: MTRR %d not used\n", reg); pr_warn("MTRR %d not used\n", reg);
goto out; goto out;
} }
if (mtrr_usage_table[reg] < 1) { if (mtrr_usage_table[reg] < 1) {
pr_warn("mtrr: reg: %d has count=0\n", reg); pr_warn("reg: %d has count=0\n", reg);
goto out; goto out;
} }
if (--mtrr_usage_table[reg] < 1) if (--mtrr_usage_table[reg] < 1)
...@@ -776,7 +775,7 @@ void __init mtrr_bp_init(void) ...@@ -776,7 +775,7 @@ void __init mtrr_bp_init(void)
} }
if (!mtrr_enabled()) { if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n"); pr_info("Disabled\n");
/* /*
* PAT initialization relies on MTRR's rendezvous handler. * PAT initialization relies on MTRR's rendezvous handler.
......
...@@ -155,7 +155,8 @@ static void __init __e820__range_add(struct e820_table *table, u64 start, u64 si ...@@ -155,7 +155,8 @@ static void __init __e820__range_add(struct e820_table *table, u64 start, u64 si
int x = table->nr_entries; int x = table->nr_entries;
if (x >= ARRAY_SIZE(table->entries)) { if (x >= ARRAY_SIZE(table->entries)) {
pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1); pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n",
start, start + size - 1);
return; return;
} }
...@@ -190,7 +191,8 @@ void __init e820__print_table(char *who) ...@@ -190,7 +191,8 @@ void __init e820__print_table(char *who)
int i; int i;
for (i = 0; i < e820_table->nr_entries; i++) { for (i = 0; i < e820_table->nr_entries; i++) {
pr_info("%s: [mem %#018Lx-%#018Lx] ", who, pr_info("%s: [mem %#018Lx-%#018Lx] ",
who,
e820_table->entries[i].addr, e820_table->entries[i].addr,
e820_table->entries[i].addr + e820_table->entries[i].size - 1); e820_table->entries[i].addr + e820_table->entries[i].size - 1);
...@@ -574,7 +576,7 @@ void __init e820__update_table_print(void) ...@@ -574,7 +576,7 @@ void __init e820__update_table_print(void)
if (e820__update_table(e820_table)) if (e820__update_table(e820_table))
return; return;
pr_info("e820: modified physical RAM map:\n"); pr_info("modified physical RAM map:\n");
e820__print_table("modified"); e820__print_table("modified");
} }
...@@ -636,9 +638,8 @@ __init void e820__setup_pci_gap(void) ...@@ -636,9 +638,8 @@ __init void e820__setup_pci_gap(void)
if (!found) { if (!found) {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
pr_err( pr_err("Cannot find an available gap in the 32-bit address range\n");
"e820: Cannot find an available gap in the 32-bit address range\n" pr_err("PCI devices with unassigned 32-bit BARs may not work!\n");
"e820: PCI devices with unassigned 32-bit BARs may not work!\n");
#else #else
gapstart = 0x10000000; gapstart = 0x10000000;
#endif #endif
...@@ -649,7 +650,8 @@ __init void e820__setup_pci_gap(void) ...@@ -649,7 +650,8 @@ __init void e820__setup_pci_gap(void)
*/ */
pci_mem_start = gapstart; pci_mem_start = gapstart;
pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1); pr_info("[mem %#010lx-%#010lx] available for PCI devices\n",
gapstart, gapstart + gapsize - 1);
} }
/* /*
...@@ -711,7 +713,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) ...@@ -711,7 +713,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
early_memunmap(sdata, data_len); early_memunmap(sdata, data_len);
pr_info("e820: extended physical RAM map:\n"); pr_info("extended physical RAM map:\n");
e820__print_table("extended"); e820__print_table("extended");
} }
...@@ -780,7 +782,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) ...@@ -780,7 +782,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
if (addr) { if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n"); pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec(); e820__update_table_kexec();
} }
...@@ -830,7 +832,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type ...@@ -830,7 +832,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
if (last_pfn > max_arch_pfn) if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn; last_pfn = max_arch_pfn;
pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n", pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n",
last_pfn, max_arch_pfn); last_pfn, max_arch_pfn);
return last_pfn; return last_pfn;
} }
...@@ -1005,7 +1007,7 @@ void __init e820__finish_early_params(void) ...@@ -1005,7 +1007,7 @@ void __init e820__finish_early_params(void)
if (e820__update_table(e820_table) < 0) if (e820__update_table(e820_table) < 0)
early_panic("Invalid user supplied memory map"); early_panic("Invalid user supplied memory map");
pr_info("e820: user-defined physical RAM map:\n"); pr_info("user-defined physical RAM map:\n");
e820__print_table("user"); e820__print_table("user");
} }
} }
...@@ -1238,7 +1240,7 @@ void __init e820__memory_setup(void) ...@@ -1238,7 +1240,7 @@ void __init e820__memory_setup(void)
memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
pr_info("e820: BIOS-provided physical RAM map:\n"); pr_info("BIOS-provided physical RAM map:\n");
e820__print_table(who); e820__print_table(who);
} }
......
...@@ -28,8 +28,6 @@ ...@@ -28,8 +28,6 @@
#include <asm/irq_remapping.h> #include <asm/irq_remapping.h>
#include <asm/early_ioremap.h> #include <asm/early_ioremap.h>
#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
static void __init fix_hypertransport_config(int num, int slot, int func) static void __init fix_hypertransport_config(int num, int slot, int func)
{ {
u32 htcfg; u32 htcfg;
...@@ -617,7 +615,8 @@ static void __init apple_airport_reset(int bus, int slot, int func) ...@@ -617,7 +615,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
dev_err("Cannot power up Apple AirPort card\n"); pr_err("pci 0000:%02x:%02x.%d: Cannot power up Apple AirPort card\n",
bus, slot, func);
return; return;
} }
} }
...@@ -628,7 +627,8 @@ static void __init apple_airport_reset(int bus, int slot, int func) ...@@ -628,7 +627,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
if (!mmio) { if (!mmio) {
dev_err("Cannot iomap Apple AirPort card\n"); pr_err("pci 0000:%02x:%02x.%d: Cannot iomap Apple AirPort card\n",
bus, slot, func);
return; return;
} }
......
...@@ -975,8 +975,7 @@ int __init hpet_enable(void) ...@@ -975,8 +975,7 @@ int __init hpet_enable(void)
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG); hpet_writel(cfg, HPET_CFG);
if (cfg) if (cfg)
pr_warn("HPET: Unrecognized bits %#x set in global cfg\n", pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
cfg);
for (i = 0; i <= last; ++i) { for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i)); cfg = hpet_readl(HPET_Tn_CFG(i));
...@@ -988,7 +987,7 @@ int __init hpet_enable(void) ...@@ -988,7 +987,7 @@ int __init hpet_enable(void)
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP); | HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg) if (cfg)
pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n", pr_warn("Unrecognized bits %#x set in cfg#%u\n",
cfg, i); cfg, i);
} }
hpet_print_config(); hpet_print_config();
......
...@@ -1083,8 +1083,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs ...@@ -1083,8 +1083,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr; return orig_ret_vaddr;
if (nleft != rasize) { if (nleft != rasize) {
pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
"%%ip=%#lx\n", current->pid, regs->sp, regs->ip); current->pid, regs->sp, regs->ip);
force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
} }
......
...@@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end, ...@@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */ /* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) { if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1); nid, start, end - 1);
return 0; return 0;
} }
if (mi->nr_blks >= NR_NODE_MEMBLKS) { if (mi->nr_blks >= NR_NODE_MEMBLKS) {
pr_err("NUMA: too many memblk ranges\n"); pr_err("too many memblk ranges\n");
return -EINVAL; return -EINVAL;
} }
...@@ -267,12 +267,12 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) ...@@ -267,12 +267,12 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/ */
if (bi->end > bj->start && bi->start < bj->end) { if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) { if (bi->nid != bj->nid) {
pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1, bi->nid, bi->start, bi->end - 1,
bj->nid, bj->start, bj->end - 1); bj->nid, bj->start, bj->end - 1);
return -EINVAL; return -EINVAL;
} }
pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1, bi->nid, bi->start, bi->end - 1,
bj->start, bj->end - 1); bj->start, bj->end - 1);
} }
...@@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void) ...@@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
size, PAGE_SIZE); size, PAGE_SIZE);
if (!phys) { if (!phys) {
pr_warning("NUMA: Warning: can't allocate distance table!\n"); pr_warn("Warning: can't allocate distance table!\n");
/* don't retry until explicitly reset */ /* don't retry until explicitly reset */
numa_distance = (void *)1LU; numa_distance = (void *)1LU;
return -ENOMEM; return -ENOMEM;
...@@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance) ...@@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance)
if (from >= numa_distance_cnt || to >= numa_distance_cnt || if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) { from < 0 || to < 0) {
pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
from, to, distance); from, to, distance);
return; return;
} }
if ((u8)distance != distance || if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) { (from == to && distance != LOCAL_DISTANCE)) {
pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance); from, to, distance);
return; return;
} }
......
/* bpf_jit_comp.c : BPF JIT compiler /*
* bpf_jit_comp.c: BPF JIT compiler
* *
* Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
* Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
...@@ -17,7 +18,7 @@ ...@@ -17,7 +18,7 @@
#include <asm/nospec-branch.h> #include <asm/nospec-branch.h>
/* /*
* assembly code in arch/x86/net/bpf_jit.S * Assembly code in arch/x86/net/bpf_jit.S
*/ */
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
...@@ -45,14 +46,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) ...@@ -45,14 +46,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
#define EMIT1_off32(b1, off) \ #define EMIT1_off32(b1, off) \
do {EMIT1(b1); EMIT(off, 4); } while (0) do { EMIT1(b1); EMIT(off, 4); } while (0)
#define EMIT2_off32(b1, b2, off) \ #define EMIT2_off32(b1, b2, off) \
do {EMIT2(b1, b2); EMIT(off, 4); } while (0) do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
#define EMIT3_off32(b1, b2, b3, off) \ #define EMIT3_off32(b1, b2, b3, off) \
do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
#define EMIT4_off32(b1, b2, b3, b4, off) \ #define EMIT4_off32(b1, b2, b3, b4, off) \
do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
static bool is_imm8(int value) static bool is_imm8(int value)
{ {
...@@ -71,7 +73,8 @@ static bool is_uimm32(u64 value) ...@@ -71,7 +73,8 @@ static bool is_uimm32(u64 value)
/* mov dst, src */ /* mov dst, src */
#define EMIT_mov(DST, SRC) \ #define EMIT_mov(DST, SRC) \
do {if (DST != SRC) \ do { \
if (DST != SRC) \
EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
} while (0) } while (0)
...@@ -89,7 +92,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) ...@@ -89,7 +92,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
return 0; return 0;
} }
/* list of x86 cond jumps opcodes (. + s8) /*
* List of x86 cond jumps opcodes (. + s8)
* Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
*/ */
#define X86_JB 0x72 #define X86_JB 0x72
...@@ -106,35 +110,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) ...@@ -106,35 +110,37 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define CHOOSE_LOAD_FUNC(K, func) \ #define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
/* pick a register outside of BPF range for JIT internal work */ /* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1) #define AUX_REG (MAX_BPF_JIT_REG + 1)
/* The following table maps BPF registers to x64 registers. /*
* The following table maps BPF registers to x86-64 registers.
* *
* x64 register r12 is unused, since if used as base address * x86-64 register R12 is unused, since if used as base address
* register in load/store instructions, it always needs an * register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved. * extra byte of encoding and is callee saved.
* *
* r9 caches skb->len - skb->data_len * R9 caches skb->len - skb->data_len
* r10 caches skb->data, and used for blinding (if enabled) * R10 caches skb->data, and used for blinding (if enabled)
*/ */
static const int reg2hex[] = { static const int reg2hex[] = {
[BPF_REG_0] = 0, /* rax */ [BPF_REG_0] = 0, /* RAX */
[BPF_REG_1] = 7, /* rdi */ [BPF_REG_1] = 7, /* RDI */
[BPF_REG_2] = 6, /* rsi */ [BPF_REG_2] = 6, /* RSI */
[BPF_REG_3] = 2, /* rdx */ [BPF_REG_3] = 2, /* RDX */
[BPF_REG_4] = 1, /* rcx */ [BPF_REG_4] = 1, /* RCX */
[BPF_REG_5] = 0, /* r8 */ [BPF_REG_5] = 0, /* R8 */
[BPF_REG_6] = 3, /* rbx callee saved */ [BPF_REG_6] = 3, /* RBX callee saved */
[BPF_REG_7] = 5, /* r13 callee saved */ [BPF_REG_7] = 5, /* R13 callee saved */
[BPF_REG_8] = 6, /* r14 callee saved */ [BPF_REG_8] = 6, /* R14 callee saved */
[BPF_REG_9] = 7, /* r15 callee saved */ [BPF_REG_9] = 7, /* R15 callee saved */
[BPF_REG_FP] = 5, /* rbp readonly */ [BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* r10 temp register */ [BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* r11 temp register */ [AUX_REG] = 3, /* R11 temp register */
}; };
/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 /*
* is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding. * which need extra byte of encoding.
* rax,rcx,...,rbp have simpler encoding * rax,rcx,...,rbp have simpler encoding
*/ */
...@@ -153,7 +159,7 @@ static bool is_axreg(u32 reg) ...@@ -153,7 +159,7 @@ static bool is_axreg(u32 reg)
return reg == BPF_REG_0; return reg == BPF_REG_0;
} }
/* add modifiers if 'reg' maps to x64 registers r8..r15 */ /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
static u8 add_1mod(u8 byte, u32 reg) static u8 add_1mod(u8 byte, u32 reg)
{ {
if (is_ereg(reg)) if (is_ereg(reg))
...@@ -170,13 +176,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) ...@@ -170,13 +176,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte; return byte;
} }
/* encode 'dst_reg' register into x64 opcode 'byte' */ /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg) static u8 add_1reg(u8 byte, u32 dst_reg)
{ {
return byte + reg2hex[dst_reg]; return byte + reg2hex[dst_reg];
} }
/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
{ {
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
...@@ -184,27 +190,28 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) ...@@ -184,27 +190,28 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
static void jit_fill_hole(void *area, unsigned int size) static void jit_fill_hole(void *area, unsigned int size)
{ {
/* fill whole space with int3 instructions */ /* Fill whole space with INT3 instructions */
memset(area, 0xcc, size); memset(area, 0xcc, size);
} }
struct jit_context { struct jit_context {
int cleanup_addr; /* epilogue code offset */ int cleanup_addr; /* Epilogue code offset */
bool seen_ld_abs; bool seen_ld_abs;
bool seen_ax_reg; bool seen_ax_reg;
}; };
/* maximum number of bytes emitted while JITing one eBPF insn */ /* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128 #define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64 #define BPF_INSN_SAFETY 64
#define AUX_STACK_SPACE \ #define AUX_STACK_SPACE \
(32 /* space for rbx, r13, r14, r15 */ + \ (32 /* Space for RBX, R13, R14, R15 */ + \
8 /* space for skb_copy_bits() buffer */) 8 /* Space for skb_copy_bits() buffer */)
#define PROLOGUE_SIZE 37 #define PROLOGUE_SIZE 37
/* emit x64 prologue code for BPF program and check it's size. /*
* Emit x86-64 prologue code for BPF program and check its size.
* bpf_tail_call helper will skip it while jumping into another program * bpf_tail_call helper will skip it while jumping into another program
*/ */
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
...@@ -212,8 +219,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) ...@@ -212,8 +219,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
u8 *prog = *pprog; u8 *prog = *pprog;
int cnt = 0; int cnt = 0;
EMIT1(0x55); /* push rbp */ /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ EMIT1(0x55);
/* mov rbp,rsp */
EMIT3(0x48, 0x89, 0xE5);
/* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
EMIT3_off32(0x48, 0x81, 0xEC, EMIT3_off32(0x48, 0x81, 0xEC,
...@@ -222,14 +232,15 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) ...@@ -222,14 +232,15 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
/* sub rbp, AUX_STACK_SPACE */ /* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
/* all classic BPF filters use R6(rbx) save it */ /* All classic BPF filters use R6(rbx) save it */
/* mov qword ptr [rbp+0],rbx */ /* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0); EMIT4(0x48, 0x89, 0x5D, 0);
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 /*
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
* R8(r14). R9(r15) spill could be made conditional, but there is only * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
* R8(R14). R9(R15) spill could be made conditional, but there is only
* one 'bpf_error' return path out of helper functions inside bpf_jit.S * one 'bpf_error' return path out of helper functions inside bpf_jit.S
* The overhead of extra spill is negligible for any filter other * The overhead of extra spill is negligible for any filter other
* than synthetic ones. Therefore not worth adding complexity. * than synthetic ones. Therefore not worth adding complexity.
...@@ -243,9 +254,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) ...@@ -243,9 +254,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
EMIT4(0x4C, 0x89, 0x7D, 24); EMIT4(0x4C, 0x89, 0x7D, 24);
if (!ebpf_from_cbpf) { if (!ebpf_from_cbpf) {
/* Clear the tail call counter (tail_call_cnt): for eBPF tail /*
* Clear the tail call counter (tail_call_cnt): for eBPF tail
* calls we need to reset the counter to 0. It's done in two * calls we need to reset the counter to 0. It's done in two
* instructions, resetting rax register to 0, and moving it * instructions, resetting RAX register to 0, and moving it
* to the counter location. * to the counter location.
*/ */
...@@ -260,7 +272,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) ...@@ -260,7 +272,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog; *pprog = prog;
} }
/* generate the following code: /*
* Generate the following code:
*
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
* if (index >= array->map.max_entries) * if (index >= array->map.max_entries)
* goto out; * goto out;
...@@ -278,22 +292,25 @@ static void emit_bpf_tail_call(u8 **pprog) ...@@ -278,22 +292,25 @@ static void emit_bpf_tail_call(u8 **pprog)
int label1, label2, label3; int label1, label2, label3;
int cnt = 0; int cnt = 0;
/* rdi - pointer to ctx /*
* rdi - pointer to ctx
* rsi - pointer to bpf_array * rsi - pointer to bpf_array
* rdx - index in bpf_array * rdx - index in bpf_array
*/ */
/* if (index >= array->map.max_entries) /*
* if (index >= array->map.max_entries)
* goto out; * goto out;
*/ */
EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries)); offsetof(struct bpf_array, map.max_entries));
#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */ EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt; label1 = cnt;
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT) /*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out; * goto out;
*/ */
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
...@@ -308,7 +325,8 @@ static void emit_bpf_tail_call(u8 **pprog) ...@@ -308,7 +325,8 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs)); offsetof(struct bpf_array, ptrs));
/* if (prog == NULL) /*
* if (prog == NULL)
* goto out; * goto out;
*/ */
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
...@@ -321,7 +339,8 @@ static void emit_bpf_tail_call(u8 **pprog) ...@@ -321,7 +339,8 @@ static void emit_bpf_tail_call(u8 **pprog)
offsetof(struct bpf_prog, bpf_func)); offsetof(struct bpf_prog, bpf_func));
EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
/* now we're ready to jump into next BPF program /*
* Wow we're ready to jump into next BPF program
* rdi == ctx (1st arg) * rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size * rax == prog->bpf_func + prologue_size
*/ */
...@@ -340,7 +359,8 @@ static void emit_load_skb_data_hlen(u8 **pprog) ...@@ -340,7 +359,8 @@ static void emit_load_skb_data_hlen(u8 **pprog)
u8 *prog = *pprog; u8 *prog = *pprog;
int cnt = 0; int cnt = 0;
/* r9d = skb->len - skb->data_len (headlen) /*
* r9d = skb->len - skb->data_len (headlen)
* r10 = skb->data * r10 = skb->data
*/ */
/* mov %r9d, off32(%rdi) */ /* mov %r9d, off32(%rdi) */
...@@ -361,7 +381,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, ...@@ -361,7 +381,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u8 b1, b2, b3; u8 b1, b2, b3;
int cnt = 0; int cnt = 0;
/* optimization: if imm32 is positive, use 'mov %eax, imm32' /*
* Optimization: if imm32 is positive, use 'mov %eax, imm32'
* (which zero-extends imm32) to save 2 bytes. * (which zero-extends imm32) to save 2 bytes.
*/ */
if (sign_propagate && (s32)imm32 < 0) { if (sign_propagate && (s32)imm32 < 0) {
...@@ -373,7 +394,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, ...@@ -373,7 +394,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
goto done; goto done;
} }
/* optimization: if imm32 is zero, use 'xor %eax, %eax' /*
* Optimization: if imm32 is zero, use 'xor %eax, %eax'
* to save 3 bytes. * to save 3 bytes.
*/ */
if (imm32 == 0) { if (imm32 == 0) {
...@@ -400,7 +422,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, ...@@ -400,7 +422,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
int cnt = 0; int cnt = 0;
if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
/* For emitting plain u32, where sign bit must not be /*
* For emitting plain u32, where sign bit must not be
* propagated LLVM tends to load imm64 over mov32 * propagated LLVM tends to load imm64 over mov32
* directly, so save couple of bytes by just doing * directly, so save couple of bytes by just doing
* 'mov %eax, imm32' instead. * 'mov %eax, imm32' instead.
...@@ -525,7 +548,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -525,7 +548,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
else if (is_ereg(dst_reg)) else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg)); EMIT1(add_1mod(0x40, dst_reg));
/* b3 holds 'normal' opcode, b2 short form only valid /*
* b3 holds 'normal' opcode, b2 short form only valid
* in case dst is eax/rax. * in case dst is eax/rax.
*/ */
switch (BPF_OP(insn->code)) { switch (BPF_OP(insn->code)) {
...@@ -593,7 +617,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -593,7 +617,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* mov rax, dst_reg */ /* mov rax, dst_reg */
EMIT_mov(BPF_REG_0, dst_reg); EMIT_mov(BPF_REG_0, dst_reg);
/* xor edx, edx /*
* xor edx, edx
* equivalent to 'xor rdx, rdx', but one byte less * equivalent to 'xor rdx, rdx', but one byte less
*/ */
EMIT2(0x31, 0xd2); EMIT2(0x31, 0xd2);
...@@ -655,7 +680,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -655,7 +680,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
} }
break; break;
} }
/* shifts */ /* Shifts */
case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_K:
...@@ -686,7 +711,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -686,7 +711,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X:
/* check for bad case when dst_reg == rcx */ /* Check for bad case when dst_reg == rcx */
if (dst_reg == BPF_REG_4) { if (dst_reg == BPF_REG_4) {
/* mov r11, dst_reg */ /* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg); EMIT_mov(AUX_REG, dst_reg);
...@@ -724,13 +749,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -724,13 +749,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_BE: case BPF_ALU | BPF_END | BPF_FROM_BE:
switch (imm32) { switch (imm32) {
case 16: case 16:
/* emit 'ror %ax, 8' to swap lower 2 bytes */ /* Emit 'ror %ax, 8' to swap lower 2 bytes */
EMIT1(0x66); EMIT1(0x66);
if (is_ereg(dst_reg)) if (is_ereg(dst_reg))
EMIT1(0x41); EMIT1(0x41);
EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
/* emit 'movzwl eax, ax' */ /* Emit 'movzwl eax, ax' */
if (is_ereg(dst_reg)) if (is_ereg(dst_reg))
EMIT3(0x45, 0x0F, 0xB7); EMIT3(0x45, 0x0F, 0xB7);
else else
...@@ -738,7 +763,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -738,7 +763,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break; break;
case 32: case 32:
/* emit 'bswap eax' to swap lower 4 bytes */ /* Emit 'bswap eax' to swap lower 4 bytes */
if (is_ereg(dst_reg)) if (is_ereg(dst_reg))
EMIT2(0x41, 0x0F); EMIT2(0x41, 0x0F);
else else
...@@ -746,7 +771,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -746,7 +771,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_1reg(0xC8, dst_reg)); EMIT1(add_1reg(0xC8, dst_reg));
break; break;
case 64: case 64:
/* emit 'bswap rax' to swap 8 bytes */ /* Emit 'bswap rax' to swap 8 bytes */
EMIT3(add_1mod(0x48, dst_reg), 0x0F, EMIT3(add_1mod(0x48, dst_reg), 0x0F,
add_1reg(0xC8, dst_reg)); add_1reg(0xC8, dst_reg));
break; break;
...@@ -756,7 +781,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -756,7 +781,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm32) { switch (imm32) {
case 16: case 16:
/* emit 'movzwl eax, ax' to zero extend 16-bit /*
* Emit 'movzwl eax, ax' to zero extend 16-bit
* into 64 bit * into 64 bit
*/ */
if (is_ereg(dst_reg)) if (is_ereg(dst_reg))
...@@ -766,7 +792,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ...@@ -766,7 +792,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break; break;
case 32: case 32:
/* emit 'mov eax, eax' to clear upper 32-bits */ /* Emit 'mov eax, eax' to clear upper 32-bits */
if (is_ereg(dst_reg)) if (is_ereg(dst_reg))
EMIT1(0x45); EMIT1(0x45);
EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
...@@ -809,9 +835,9 @@ st: if (is_imm8(insn->off)) ...@@ -809,9 +835,9 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */ /* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_B:
/* emit 'mov byte ptr [rax + off], al' */ /* Emit 'mov byte ptr [rax + off], al' */
if (is_ereg(dst_reg) || is_ereg(src_reg) || if (is_ereg(dst_reg) || is_ereg(src_reg) ||
/* have to add extra byte for x86 SIL, DIL regs */ /* We have to add extra byte for x86 SIL, DIL regs */
src_reg == BPF_REG_1 || src_reg == BPF_REG_2) src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else else
...@@ -840,25 +866,26 @@ stx: if (is_imm8(insn->off)) ...@@ -840,25 +866,26 @@ stx: if (is_imm8(insn->off))
/* LDX: dst_reg = *(u8*)(src_reg + off) */ /* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_B:
/* emit 'movzx rax, byte ptr [rax + off]' */ /* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx; goto ldx;
case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_H:
/* emit 'movzx rax, word ptr [rax + off]' */ /* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx; goto ldx;
case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_W:
/* emit 'mov eax, dword ptr [rax+0x14]' */ /* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg)) if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
else else
EMIT1(0x8B); EMIT1(0x8B);
goto ldx; goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_MEM | BPF_DW:
/* emit 'mov rax, qword ptr [rax+0x14]' */ /* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
ldx: /* if insn->off == 0 we can save one extra byte, but ldx: /*
* special case of x86 r13 which always needs an offset * If insn->off == 0 we can save one extra byte, but
* special case of x86 R13 which always needs an offset
* is not worth the hassle * is not worth the hassle
*/ */
if (is_imm8(insn->off)) if (is_imm8(insn->off))
...@@ -870,7 +897,7 @@ stx: if (is_imm8(insn->off)) ...@@ -870,7 +897,7 @@ stx: if (is_imm8(insn->off))
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
case BPF_STX | BPF_XADD | BPF_W: case BPF_STX | BPF_XADD | BPF_W:
/* emit 'lock add dword ptr [rax + off], eax' */ /* Emit 'lock add dword ptr [rax + off], eax' */
if (is_ereg(dst_reg) || is_ereg(src_reg)) if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
else else
...@@ -897,14 +924,15 @@ xadd: if (is_imm8(insn->off)) ...@@ -897,14 +924,15 @@ xadd: if (is_imm8(insn->off))
} else { } else {
EMIT2(0x41, 0x52); /* push %r10 */ EMIT2(0x41, 0x52); /* push %r10 */
EMIT2(0x41, 0x51); /* push %r9 */ EMIT2(0x41, 0x51); /* push %r9 */
/* need to adjust jmp offset, since /*
* We need to adjust jmp offset, since
* pop %r9, pop %r10 take 4 bytes after call insn * pop %r9, pop %r10 take 4 bytes after call insn
*/ */
jmp_offset += 4; jmp_offset += 4;
} }
} }
if (!imm32 || !is_simm32(jmp_offset)) { if (!imm32 || !is_simm32(jmp_offset)) {
pr_err("unsupported bpf func %d addr %p image %p\n", pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image); imm32, func, image);
return -EINVAL; return -EINVAL;
} }
...@@ -970,7 +998,7 @@ xadd: if (is_imm8(insn->off)) ...@@ -970,7 +998,7 @@ xadd: if (is_imm8(insn->off))
else else
EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
emit_cond_jmp: /* convert BPF opcode to x86 */ emit_cond_jmp: /* Convert BPF opcode to x86 */
switch (BPF_OP(insn->code)) { switch (BPF_OP(insn->code)) {
case BPF_JEQ: case BPF_JEQ:
jmp_cond = X86_JE; jmp_cond = X86_JE;
...@@ -996,22 +1024,22 @@ xadd: if (is_imm8(insn->off)) ...@@ -996,22 +1024,22 @@ xadd: if (is_imm8(insn->off))
jmp_cond = X86_JBE; jmp_cond = X86_JBE;
break; break;
case BPF_JSGT: case BPF_JSGT:
/* signed '>', GT in x86 */ /* Signed '>', GT in x86 */
jmp_cond = X86_JG; jmp_cond = X86_JG;
break; break;
case BPF_JSLT: case BPF_JSLT:
/* signed '<', LT in x86 */ /* Signed '<', LT in x86 */
jmp_cond = X86_JL; jmp_cond = X86_JL;
break; break;
case BPF_JSGE: case BPF_JSGE:
/* signed '>=', GE in x86 */ /* Signed '>=', GE in x86 */
jmp_cond = X86_JGE; jmp_cond = X86_JGE;
break; break;
case BPF_JSLE: case BPF_JSLE:
/* signed '<=', LE in x86 */ /* Signed '<=', LE in x86 */
jmp_cond = X86_JLE; jmp_cond = X86_JLE;
break; break;
default: /* to silence gcc warning */ default: /* to silence GCC warning */
return -EFAULT; return -EFAULT;
} }
jmp_offset = addrs[i + insn->off] - addrs[i]; jmp_offset = addrs[i + insn->off] - addrs[i];
...@@ -1039,7 +1067,7 @@ xadd: if (is_imm8(insn->off)) ...@@ -1039,7 +1067,7 @@ xadd: if (is_imm8(insn->off))
jmp_offset = addrs[i + insn->off] - addrs[i]; jmp_offset = addrs[i + insn->off] - addrs[i];
if (!jmp_offset) if (!jmp_offset)
/* optimize out nop jumps */ /* Optimize out nop jumps */
break; break;
emit_jmp: emit_jmp:
if (is_imm8(jmp_offset)) { if (is_imm8(jmp_offset)) {
...@@ -1061,7 +1089,7 @@ xadd: if (is_imm8(insn->off)) ...@@ -1061,7 +1089,7 @@ xadd: if (is_imm8(insn->off))
ctx->seen_ld_abs = seen_ld_abs = true; ctx->seen_ld_abs = seen_ld_abs = true;
jmp_offset = func - (image + addrs[i]); jmp_offset = func - (image + addrs[i]);
if (!func || !is_simm32(jmp_offset)) { if (!func || !is_simm32(jmp_offset)) {
pr_err("unsupported bpf func %d addr %p image %p\n", pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image); imm32, func, image);
return -EINVAL; return -EINVAL;
} }
...@@ -1080,7 +1108,8 @@ xadd: if (is_imm8(insn->off)) ...@@ -1080,7 +1108,8 @@ xadd: if (is_imm8(insn->off))
EMIT2_off32(0x81, 0xC6, imm32); EMIT2_off32(0x81, 0xC6, imm32);
} }
} }
/* skb pointer is in R6 (%rbx), it will be copied into /*
* skb pointer is in R6 (%rbx), it will be copied into
* %rdi if skb_copy_bits() call is necessary. * %rdi if skb_copy_bits() call is necessary.
* sk_load_* helpers also use %r10 and %r9d. * sk_load_* helpers also use %r10 and %r9d.
* See bpf_jit.S * See bpf_jit.S
...@@ -1111,7 +1140,7 @@ xadd: if (is_imm8(insn->off)) ...@@ -1111,7 +1140,7 @@ xadd: if (is_imm8(insn->off))
goto emit_jmp; goto emit_jmp;
} }
seen_exit = true; seen_exit = true;
/* update cleanup_addr */ /* Update cleanup_addr */
ctx->cleanup_addr = proglen; ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp+0] */ /* mov rbx, qword ptr [rbp+0] */
EMIT4(0x48, 0x8B, 0x5D, 0); EMIT4(0x48, 0x8B, 0x5D, 0);
...@@ -1129,10 +1158,11 @@ xadd: if (is_imm8(insn->off)) ...@@ -1129,10 +1158,11 @@ xadd: if (is_imm8(insn->off))
break; break;
default: default:
/* By design x64 JIT should support all BPF instructions /*
* By design x86-64 JIT should support all BPF instructions.
* This error will be seen if new instruction was added * This error will be seen if new instruction was added
* to interpreter, but not to JIT * to the interpreter, but not to the JIT, or if there is
* or if there is junk in bpf_prog * junk in bpf_prog.
*/ */
pr_err("bpf_jit: unknown opcode %02x\n", insn->code); pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
return -EINVAL; return -EINVAL;
...@@ -1184,7 +1214,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -1184,7 +1214,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
return orig_prog; return orig_prog;
tmp = bpf_jit_blind_constants(prog); tmp = bpf_jit_blind_constants(prog);
/* If blinding was requested and we failed during blinding, /*
* If blinding was requested and we failed during blinding,
* we must fall back to the interpreter. * we must fall back to the interpreter.
*/ */
if (IS_ERR(tmp)) if (IS_ERR(tmp))
...@@ -1218,8 +1249,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -1218,8 +1249,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_addrs; goto out_addrs;
} }
/* Before first pass, make a rough estimation of addrs[] /*
* each bpf instruction is translated to less than 64 bytes * Before first pass, make a rough estimation of addrs[]
* each BPF instruction is translated to less than 64 bytes
*/ */
for (proglen = 0, i = 0; i < prog->len; i++) { for (proglen = 0, i = 0; i < prog->len; i++) {
proglen += 64; proglen += 64;
...@@ -1228,10 +1260,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ...@@ -1228,10 +1260,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.cleanup_addr = proglen; ctx.cleanup_addr = proglen;
skip_init_addrs: skip_init_addrs:
/* JITed image shrinks with every pass and the loop iterates /*
* until the image stops shrinking. Very large bpf programs * JITed image shrinks with every pass and the loop iterates
* until the image stops shrinking. Very large BPF programs
* may converge on the last pass. In such case do one more * may converge on the last pass. In such case do one more
* pass to emit the final image * pass to emit the final image.
*/ */
for (pass = 0; pass < 20 || image; pass++) { for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx); proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment