Commit e35e5b6f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen security fixes from Juergen Gross:

 - XSA-403 (4 patches for blkfront and netfront drivers):

   Linux Block and Network PV device frontends don't zero memory regions
   before sharing them with the backend (CVE-2022-26365,
   CVE-2022-33740). Additionally the granularity of the grant table
   doesn't allow sharing less than a 4K page, leading to unrelated data
   residing in the same 4K page as data shared with a backend being
   accessible by such backend (CVE-2022-33741, CVE-2022-33742).

 - XSA-405 (1 patch for netfront driver, only 5.10 and newer):

   While adding logic to support XDP (eXpress Data Path), a code label
   was moved in a way allowing for SKBs having references (pointers)
   retained for further processing to nevertheless be freed.

 - XSA-406 (1 patch for Arm specific dom0 code):

   When mapping pages of guests on Arm, dom0 is using an rbtree to keep
   track of the foreign mappings.

   Updating of that rbtree is not always done completely with the
   related lock held, resulting in a small race window, which can be
   used by unprivileged guests via PV devices to cause inconsistencies
   of the rbtree. These inconsistencies can lead to Denial of Service
   (DoS) of dom0, e.g. by causing crashes or the inability to perform
   further mappings of other guests' memory pages.

* tag 'xsa-5.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/arm: Fix race in RB-tree based P2M accounting
  xen-netfront: restore __skb_queue_tail() positioning in xennet_get_responses()
  xen/blkfront: force data bouncing when backend is untrusted
  xen/netfront: force data bouncing when backend is untrusted
  xen/netfront: fix leaking data in shared pages
  xen/blkfront: fix leaking data in shared pages
parents c1084b6c b75cd218
......@@ -63,11 +63,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new)
unsigned long __pfn_to_mfn(unsigned long pfn)
{
struct rb_node *n = phys_to_mach.rb_node;
struct rb_node *n;
struct xen_p2m_entry *entry;
unsigned long irqflags;
read_lock_irqsave(&p2m_lock, irqflags);
n = phys_to_mach.rb_node;
while (n) {
entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
if (entry->pfn <= pfn &&
......@@ -152,10 +153,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn,
int rc;
unsigned long irqflags;
struct xen_p2m_entry *p2m_entry;
struct rb_node *n = phys_to_mach.rb_node;
struct rb_node *n;
if (mfn == INVALID_P2M_ENTRY) {
write_lock_irqsave(&p2m_lock, irqflags);
n = phys_to_mach.rb_node;
while (n) {
p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
if (p2m_entry->pfn <= pfn &&
......
......@@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
static bool __read_mostly xen_blkif_trusted = true;
module_param_named(trusted, xen_blkif_trusted, bool, 0644);
MODULE_PARM_DESC(trusted, "Is the backend trusted");
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
......@@ -210,6 +214,7 @@ struct blkfront_info
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
unsigned int feature_persistent:1;
unsigned int bounce:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
/* Number of 4KB segments handled */
......@@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
if (!gnt_list_entry)
goto out_of_memory;
if (info->feature_persistent) {
granted_page = alloc_page(GFP_NOIO);
if (info->bounce) {
granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
if (!granted_page) {
kfree(gnt_list_entry);
goto out_of_memory;
......@@ -330,7 +335,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
list_for_each_entry_safe(gnt_list_entry, n,
&rinfo->grants, node) {
list_del(&gnt_list_entry->node);
if (info->feature_persistent)
if (info->bounce)
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
i--;
......@@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head,
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
if (info->feature_persistent)
if (info->bounce)
grant_foreign_access(gnt_list_entry, info);
else {
/* Grant access to the GFN passed by the caller */
......@@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
if (!info->feature_persistent) {
if (!info->bounce) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
......@@ -703,7 +708,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
.grant_idx = 0,
.segments = NULL,
.rinfo = rinfo,
.need_copy = rq_data_dir(req) && info->feature_persistent,
.need_copy = rq_data_dir(req) && info->bounce,
};
/*
......@@ -981,11 +986,12 @@ static void xlvbd_flush(struct blkfront_info *info)
{
blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
info->feature_fua ? true : false);
pr_info("blkfront: %s: %s %s %s %s %s\n",
pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
"enabled;" : "disabled;", "indirect descriptors:",
info->max_indirect_segments ? "enabled;" : "disabled;");
info->max_indirect_segments ? "enabled;" : "disabled;",
"bounce buffer:", info->bounce ? "enabled" : "disabled;");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
......@@ -1207,7 +1213,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
BUG_ON(info->feature_persistent);
BUG_ON(info->bounce);
list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
......@@ -1224,7 +1230,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
NULL);
rinfo->persistent_gnts_c--;
}
if (info->feature_persistent)
if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
......@@ -1245,7 +1251,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
for (j = 0; j < segs; j++) {
persistent_gnt = rinfo->shadow[i].grants_used[j];
gnttab_end_foreign_access(persistent_gnt->gref, NULL);
if (info->feature_persistent)
if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
......@@ -1428,7 +1434,7 @@ static int blkif_completion(unsigned long *id,
data.s = s;
num_sg = s->num_sg;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
if (bret->operation == BLKIF_OP_READ && info->bounce) {
for_each_sg(s->sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
......@@ -1487,7 +1493,7 @@ static int blkif_completion(unsigned long *id,
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
if (!info->feature_persistent) {
if (!info->bounce) {
indirect_page = s->indirect_grants[i]->page;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
......@@ -1764,6 +1770,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
if (!info)
return -ENODEV;
/* Check if backend is trusted. */
info->bounce = !xen_blkif_trusted ||
!xenbus_read_unsigned(dev->nodename, "trusted", 1);
max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
"max-ring-page-order", 0);
ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
......@@ -2173,17 +2183,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
if (err)
goto out_of_memory;
if (!info->feature_persistent && info->max_indirect_segments) {
if (!info->bounce && info->max_indirect_segments) {
/*
* We are using indirect descriptors but not persistent
* grants, we need to allocate a set of pages that can be
* We are using indirect descriptors but don't have a bounce
* buffer, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
struct page *indirect_page = alloc_page(GFP_KERNEL);
struct page *indirect_page = alloc_page(GFP_KERNEL |
__GFP_ZERO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
......@@ -2276,6 +2287,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
info->feature_persistent =
!!xenbus_read_unsigned(info->xbdev->otherend,
"feature-persistent", 0);
if (info->feature_persistent)
info->bounce = true;
indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
"feature-max-indirect-segments", 0);
......@@ -2547,6 +2560,13 @@ static void blkfront_delay_work(struct work_struct *work)
struct blkfront_info *info;
bool need_schedule_work = false;
/*
* Note that when using bounce buffers but not persistent grants
* there's no need to run blkfront_delay_work because grants are
* revoked in blkif_completion or else an error is reported and the
* connection is closed.
*/
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {
......
......@@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
"Maximum number of queues per virtual interface");
static bool __read_mostly xennet_trusted = true;
module_param_named(trusted, xennet_trusted, bool, 0644);
MODULE_PARM_DESC(trusted, "Is the backend trusted");
#define XENNET_TIMEOUT (5 * HZ)
static const struct ethtool_ops xennet_ethtool_ops;
......@@ -173,6 +177,9 @@ struct netfront_info {
/* Is device behaving sane? */
bool broken;
/* Should skbs be bounced into a zeroed buffer? */
bool bounce;
atomic_t rx_gso_checksum_fixup;
};
......@@ -271,7 +278,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
if (unlikely(!skb))
return NULL;
page = page_pool_dev_alloc_pages(queue->page_pool);
page = page_pool_alloc_pages(queue->page_pool,
GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
if (unlikely(!page)) {
kfree_skb(skb);
return NULL;
......@@ -665,6 +673,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
return nxmit;
}
struct sk_buff *bounce_skb(const struct sk_buff *skb)
{
unsigned int headerlen = skb_headroom(skb);
/* Align size to allocate full pages and avoid contiguous data leaks */
unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
XEN_PAGE_SIZE);
struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
if (!n)
return NULL;
if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
WARN_ONCE(1, "misaligned skb allocated\n");
kfree_skb(n);
return NULL;
}
/* Set the data pointer */
skb_reserve(n, headerlen);
/* Set the tail pointer and length */
skb_put(n, skb->len);
BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
skb_copy_header(n, skb);
return n;
}
#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
......@@ -718,9 +753,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* The first req should be at least ETH_HLEN size or the packet will be
* dropped by netback.
*
* If the backend is not trusted bounce all data to zeroed pages to
* avoid exposing contiguous data on the granted page not belonging to
* the skb.
*/
if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
nskb = skb_copy(skb, GFP_ATOMIC);
if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
nskb = bounce_skb(skb);
if (!nskb)
goto drop;
dev_consume_skb_any(skb);
......@@ -1053,8 +1092,10 @@ static int xennet_get_responses(struct netfront_queue *queue,
}
}
rcu_read_unlock();
next:
__skb_queue_tail(list, skb);
next:
if (!(rx->flags & XEN_NETRXF_more_data))
break;
......@@ -2214,6 +2255,10 @@ static int talk_to_netback(struct xenbus_device *dev,
info->netdev->irq = 0;
/* Check if backend is trusted. */
info->bounce = !xennet_trusted ||
!xenbus_read_unsigned(dev->nodename, "trusted", 1);
/* Check if backend supports multiple queues */
max_queues = xenbus_read_unsigned(info->xbdev->otherend,
"multi-queue-max-queues", 1);
......@@ -2381,6 +2426,9 @@ static int xennet_connect(struct net_device *dev)
return err;
if (np->netback_has_xdp_headroom)
pr_info("backend supports XDP headroom\n");
if (np->bounce)
dev_info(&np->xbdev->dev,
"bouncing transmitted data to zeroed pages\n");
/* talk_to_netback() sets the correct number of queues */
num_queues = dev->real_num_tx_queues;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment