Commit 6a965666 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'notifications-pipe-prep-20191115' of...

Merge tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull pipe rework from David Howells:
 "This is my set of preparatory patches for building a general
  notification queue on top of pipes. It makes a number of significant
  changes:

   - It removes the nr_exclusive argument from __wake_up_sync_key() as
     this is always 1. This prepares for the next step:

   - Adds wake_up_interruptible_sync_poll_locked() so that poll can be
     woken up from a function that's holding the poll waitqueue
     spinlock.

   - Change the pipe buffer ring to be managed in terms of unbounded
     head and tail indices rather than bounded index and length. This
     means that reading the pipe only needs to modify one index, not
     two.

   - A selection of helper functions are provided to query the state of
     the pipe buffer, plus a couple to apply updates to the pipe
     indices.

   - The pipe ring is allowed to have kernel-reserved slots. This allows
     many notification messages to be spliced in by the kernel without
     allowing userspace to pin too many pages if it writes to the same
     pipe.

   - Advance the head and tail indices inside the pipe waitqueue lock
     and use wake_up_interruptible_sync_poll_locked() to poke poll
     without having to take the lock twice.

   - Rearrange pipe_write() to preallocate the buffer it is going to
     write into and then drop the spinlock. This allows kernel
     notifications to then be added the ring whilst it is filling the
     buffer it allocated. The read side is stalled because the pipe
     mutex is still held.

   - Don't wake up readers on a pipe if there was already data in it
     when we added more.

   - Don't wake up writers on a pipe if the ring wasn't full before we
     removed a buffer"

* tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  pipe: Remove sync on wake_ups
  pipe: Increase the writer-wakeup threshold to reduce context-switch count
  pipe: Check for ring full inside of the spinlock in pipe_write()
  pipe: Remove redundant wakeup from pipe_write()
  pipe: Rearrange sequence in pipe_write() to preallocate slot
  pipe: Conditionalise wakeup in pipe_read()
  pipe: Advance tail pointer inside of wait spinlock in pipe_read()
  pipe: Allow pipes to have kernel-reserved slots
  pipe: Use head and tail pointers for the ring, not cursor and length
  Add wake_up_interruptible_sync_poll_locked()
  Remove the nr_exclusive argument from __wake_up_sync_key()
  pipe: Reduce #inclusion of pipe_fs_i.h
parents 32ef9553 3c0edea9
...@@ -919,6 +919,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, ...@@ -919,6 +919,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
.pos = *ppos, .pos = *ppos,
.u.data = &sgl, .u.data = &sgl,
}; };
unsigned int occupancy;
/* /*
* Rproc_serial does not yet support splice. To support splice * Rproc_serial does not yet support splice. To support splice
...@@ -929,21 +930,18 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, ...@@ -929,21 +930,18 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
if (is_rproc_serial(port->out_vq->vdev)) if (is_rproc_serial(port->out_vq->vdev))
return -EINVAL; return -EINVAL;
/*
* pipe->nrbufs == 0 means there are no data to transfer,
* so this returns just 0 for no data.
*/
pipe_lock(pipe); pipe_lock(pipe);
if (!pipe->nrbufs) { ret = 0;
ret = 0; if (pipe_empty(pipe->head, pipe->tail))
goto error_out; goto error_out;
}
ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
if (ret < 0) if (ret < 0)
goto error_out; goto error_out;
buf = alloc_buf(port->portdev->vdev, 0, pipe->nrbufs); occupancy = pipe_occupancy(pipe->head, pipe->tail);
buf = alloc_buf(port->portdev->vdev, 0, occupancy);
if (!buf) { if (!buf) {
ret = -ENOMEM; ret = -ENOMEM;
goto error_out; goto error_out;
...@@ -951,7 +949,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, ...@@ -951,7 +949,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
sgl.n = 0; sgl.n = 0;
sgl.len = 0; sgl.len = 0;
sgl.size = pipe->nrbufs; sgl.size = occupancy;
sgl.sg = buf->sg; sgl.sg = buf->sg;
sg_init_table(sgl.sg, sgl.size); sg_init_table(sgl.sg, sgl.size);
ret = __splice_from_pipe(pipe, &sd, pipe_to_sg); ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
......
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/fsnotify.h> #include <linux/fsnotify.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h> #include <linux/oom.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
......
...@@ -705,7 +705,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) ...@@ -705,7 +705,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->pipebufs++; cs->pipebufs++;
cs->nr_segs--; cs->nr_segs--;
} else { } else {
if (cs->nr_segs == cs->pipe->buffers) if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO; return -EIO;
page = alloc_page(GFP_HIGHUSER); page = alloc_page(GFP_HIGHUSER);
...@@ -881,7 +881,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, ...@@ -881,7 +881,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
struct pipe_buffer *buf; struct pipe_buffer *buf;
int err; int err;
if (cs->nr_segs == cs->pipe->buffers) if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO; return -EIO;
err = unlock_request(cs->req); err = unlock_request(cs->req);
...@@ -1343,7 +1343,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, ...@@ -1343,7 +1343,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fud) if (!fud)
return -EPERM; return -EPERM;
bufs = kvmalloc_array(pipe->buffers, sizeof(struct pipe_buffer), bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
GFP_KERNEL); GFP_KERNEL);
if (!bufs) if (!bufs)
return -ENOMEM; return -ENOMEM;
...@@ -1355,7 +1355,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, ...@@ -1355,7 +1355,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0) if (ret < 0)
goto out; goto out;
if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
...@@ -1937,6 +1937,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ...@@ -1937,6 +1937,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos, struct file *out, loff_t *ppos,
size_t len, unsigned int flags) size_t len, unsigned int flags)
{ {
unsigned int head, tail, mask, count;
unsigned nbuf; unsigned nbuf;
unsigned idx; unsigned idx;
struct pipe_buffer *bufs; struct pipe_buffer *bufs;
...@@ -1951,8 +1952,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ...@@ -1951,8 +1952,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe_lock(pipe); pipe_lock(pipe);
bufs = kvmalloc_array(pipe->nrbufs, sizeof(struct pipe_buffer), head = pipe->head;
GFP_KERNEL); tail = pipe->tail;
mask = pipe->ring_size - 1;
count = head - tail;
bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs) { if (!bufs) {
pipe_unlock(pipe); pipe_unlock(pipe);
return -ENOMEM; return -ENOMEM;
...@@ -1960,8 +1965,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ...@@ -1960,8 +1965,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
nbuf = 0; nbuf = 0;
rem = 0; rem = 0;
for (idx = 0; idx < pipe->nrbufs && rem < len; idx++) for (idx = tail; idx < head && rem < len; idx++)
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len; rem += pipe->bufs[idx & mask].len;
ret = -EINVAL; ret = -EINVAL;
if (rem < len) if (rem < len)
...@@ -1972,16 +1977,16 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, ...@@ -1972,16 +1977,16 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf; struct pipe_buffer *ibuf;
struct pipe_buffer *obuf; struct pipe_buffer *obuf;
BUG_ON(nbuf >= pipe->buffers); BUG_ON(nbuf >= pipe->ring_size);
BUG_ON(!pipe->nrbufs); BUG_ON(tail == head);
ibuf = &pipe->bufs[pipe->curbuf]; ibuf = &pipe->bufs[tail & mask];
obuf = &bufs[nbuf]; obuf = &bufs[nbuf];
if (rem >= ibuf->len) { if (rem >= ibuf->len) {
*obuf = *ibuf; *obuf = *ibuf;
ibuf->ops = NULL; ibuf->ops = NULL;
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); tail++;
pipe->nrbufs--; pipe->tail = tail;
} else { } else {
if (!pipe_buf_get(pipe, ibuf)) if (!pipe_buf_get(pipe, ibuf))
goto out_free; goto out_free;
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
......
...@@ -43,10 +43,12 @@ unsigned long pipe_user_pages_hard; ...@@ -43,10 +43,12 @@ unsigned long pipe_user_pages_hard;
unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
/* /*
* We use a start+len construction, which provides full use of the * We use head and tail indices that aren't masked off, except at the point of
* allocated memory. * dereference, but rather they're allowed to wrap naturally. This means there
* -- Florian Coosmann (FGC) * isn't a dead spot in the buffer, but the ring has to be a power of two and
* * <= 2^31.
* -- David Howells 2019-09-23.
*
* Reads with count = 0 should always return 0. * Reads with count = 0 should always return 0.
* -- Julian Bradfield 1999-06-07. * -- Julian Bradfield 1999-06-07.
* *
...@@ -285,10 +287,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) ...@@ -285,10 +287,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = 0; ret = 0;
__pipe_lock(pipe); __pipe_lock(pipe);
for (;;) { for (;;) {
int bufs = pipe->nrbufs; unsigned int head = pipe->head;
if (bufs) { unsigned int tail = pipe->tail;
int curbuf = pipe->curbuf; unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = pipe->bufs + curbuf;
if (!pipe_empty(head, tail)) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
size_t chars = buf->len; size_t chars = buf->len;
size_t written; size_t written;
int error; int error;
...@@ -320,18 +324,27 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) ...@@ -320,18 +324,27 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
} }
if (!buf->len) { if (!buf->len) {
bool wake;
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
curbuf = (curbuf + 1) & (pipe->buffers - 1); spin_lock_irq(&pipe->wait.lock);
pipe->curbuf = curbuf; tail++;
pipe->nrbufs = --bufs; pipe->tail = tail;
do_wakeup = 1; do_wakeup = 1;
wake = head - (tail - 1) == pipe->max_usage / 2;
if (wake)
wake_up_locked_poll(
&pipe->wait, EPOLLOUT | EPOLLWRNORM);
spin_unlock_irq(&pipe->wait.lock);
if (wake)
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
} }
total_len -= chars; total_len -= chars;
if (!total_len) if (!total_len)
break; /* common path: read succeeded */ break; /* common path: read succeeded */
if (!pipe_empty(head, tail)) /* More to do? */
continue;
} }
if (bufs) /* More to do? */
continue;
if (!pipe->writers) if (!pipe->writers)
break; break;
if (!pipe->waiting_writers) { if (!pipe->waiting_writers) {
...@@ -352,17 +365,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) ...@@ -352,17 +365,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
break; break;
} }
if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
pipe_wait(pipe); pipe_wait(pipe);
} }
__pipe_unlock(pipe); __pipe_unlock(pipe);
/* Signal writers asynchronously that there is more room. */ /* Signal writers asynchronously that there is more room. */
if (do_wakeup) { if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); wake_up_interruptible_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
} }
if (ret > 0) if (ret > 0)
...@@ -380,6 +389,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -380,6 +389,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *filp = iocb->ki_filp; struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data; struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, max_usage, mask;
ssize_t ret = 0; ssize_t ret = 0;
int do_wakeup = 0; int do_wakeup = 0;
size_t total_len = iov_iter_count(from); size_t total_len = iov_iter_count(from);
...@@ -397,12 +407,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -397,12 +407,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
goto out; goto out;
} }
head = pipe->head;
max_usage = pipe->max_usage;
mask = pipe->ring_size - 1;
/* We try to merge small writes */ /* We try to merge small writes */
chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
if (pipe->nrbufs && chars != 0) { if (!pipe_empty(head, pipe->tail) && chars != 0) {
int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
(pipe->buffers - 1);
struct pipe_buffer *buf = pipe->bufs + lastbuf;
int offset = buf->offset + buf->len; int offset = buf->offset + buf->len;
if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) { if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) {
...@@ -423,18 +435,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -423,18 +435,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
} }
for (;;) { for (;;) {
int bufs;
if (!pipe->readers) { if (!pipe->readers) {
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
if (!ret) if (!ret)
ret = -EPIPE; ret = -EPIPE;
break; break;
} }
bufs = pipe->nrbufs;
if (bufs < pipe->buffers) { head = pipe->head;
int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); if (!pipe_full(head, pipe->tail, max_usage)) {
struct pipe_buffer *buf = pipe->bufs + newbuf; struct pipe_buffer *buf = &pipe->bufs[head & mask];
struct page *page = pipe->tmp_page; struct page *page = pipe->tmp_page;
int copied; int copied;
...@@ -446,38 +456,64 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -446,38 +456,64 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
} }
pipe->tmp_page = page; pipe->tmp_page = page;
} }
/* Allocate a slot in the ring in advance and attach an
* empty buffer. If we fault or otherwise fail to use
* it, either the reader will consume it or it'll still
* be there for the next write.
*/
spin_lock_irq(&pipe->wait.lock);
head = pipe->head;
if (pipe_full(head, pipe->tail, max_usage)) {
spin_unlock_irq(&pipe->wait.lock);
continue;
}
pipe->head = head + 1;
/* Always wake up, even if the copy fails. Otherwise /* Always wake up, even if the copy fails. Otherwise
* we lock up (O_NONBLOCK-)readers that sleep due to * we lock up (O_NONBLOCK-)readers that sleep due to
* syscall merging. * syscall merging.
* FIXME! Is this really true? * FIXME! Is this really true?
*/ */
do_wakeup = 1; wake_up_locked_poll(
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); &pipe->wait, EPOLLIN | EPOLLRDNORM);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
if (!ret) spin_unlock_irq(&pipe->wait.lock);
ret = -EFAULT; kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
break;
}
ret += copied;
/* Insert it into the buffer array */ /* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
buf->page = page; buf->page = page;
buf->ops = &anon_pipe_buf_ops; buf->ops = &anon_pipe_buf_ops;
buf->offset = 0; buf->offset = 0;
buf->len = copied; buf->len = 0;
buf->flags = 0; buf->flags = 0;
if (is_packetized(filp)) { if (is_packetized(filp)) {
buf->ops = &packet_pipe_buf_ops; buf->ops = &packet_pipe_buf_ops;
buf->flags = PIPE_BUF_FLAG_PACKET; buf->flags = PIPE_BUF_FLAG_PACKET;
} }
pipe->nrbufs = ++bufs;
pipe->tmp_page = NULL; pipe->tmp_page = NULL;
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
if (!ret)
ret = -EFAULT;
break;
}
ret += copied;
buf->offset = 0;
buf->len = copied;
if (!iov_iter_count(from)) if (!iov_iter_count(from))
break; break;
} }
if (bufs < pipe->buffers)
if (!pipe_full(head, pipe->tail, max_usage))
continue; continue;
/* Wait for buffer space to become available. */
if (filp->f_flags & O_NONBLOCK) { if (filp->f_flags & O_NONBLOCK) {
if (!ret) if (!ret)
ret = -EAGAIN; ret = -EAGAIN;
...@@ -488,11 +524,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -488,11 +524,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
break; break;
} }
if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
pipe->waiting_writers++; pipe->waiting_writers++;
pipe_wait(pipe); pipe_wait(pipe);
pipe->waiting_writers--; pipe->waiting_writers--;
...@@ -500,7 +531,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -500,7 +531,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
out: out:
__pipe_unlock(pipe); __pipe_unlock(pipe);
if (do_wakeup) { if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); wake_up_interruptible_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
} }
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
...@@ -515,17 +546,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -515,17 +546,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct pipe_inode_info *pipe = filp->private_data; struct pipe_inode_info *pipe = filp->private_data;
int count, buf, nrbufs; int count, head, tail, mask;
switch (cmd) { switch (cmd) {
case FIONREAD: case FIONREAD:
__pipe_lock(pipe); __pipe_lock(pipe);
count = 0; count = 0;
buf = pipe->curbuf; head = pipe->head;
nrbufs = pipe->nrbufs; tail = pipe->tail;
while (--nrbufs >= 0) { mask = pipe->ring_size - 1;
count += pipe->bufs[buf].len;
buf = (buf+1) & (pipe->buffers - 1); while (tail != head) {
count += pipe->bufs[tail & mask].len;
tail++;
} }
__pipe_unlock(pipe); __pipe_unlock(pipe);
...@@ -541,21 +574,25 @@ pipe_poll(struct file *filp, poll_table *wait) ...@@ -541,21 +574,25 @@ pipe_poll(struct file *filp, poll_table *wait)
{ {
__poll_t mask; __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data; struct pipe_inode_info *pipe = filp->private_data;
int nrbufs; unsigned int head = READ_ONCE(pipe->head);
unsigned int tail = READ_ONCE(pipe->tail);
poll_wait(filp, &pipe->wait, wait); poll_wait(filp, &pipe->wait, wait);
BUG_ON(pipe_occupancy(head, tail) > pipe->ring_size);
/* Reading only -- no need for acquiring the semaphore. */ /* Reading only -- no need for acquiring the semaphore. */
nrbufs = pipe->nrbufs;
mask = 0; mask = 0;
if (filp->f_mode & FMODE_READ) { if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe_empty(head, tail))
mask |= EPOLLIN | EPOLLRDNORM;
if (!pipe->writers && filp->f_version != pipe->w_counter) if (!pipe->writers && filp->f_version != pipe->w_counter)
mask |= EPOLLHUP; mask |= EPOLLHUP;
} }
if (filp->f_mode & FMODE_WRITE) { if (filp->f_mode & FMODE_WRITE) {
mask |= (nrbufs < pipe->buffers) ? EPOLLOUT | EPOLLWRNORM : 0; if (!pipe_full(head, tail, pipe->max_usage))
mask |= EPOLLOUT | EPOLLWRNORM;
/* /*
* Most Unices do not set EPOLLERR for FIFOs but on Linux they * Most Unices do not set EPOLLERR for FIFOs but on Linux they
* behave exactly like pipes for poll(). * behave exactly like pipes for poll().
...@@ -679,7 +716,8 @@ struct pipe_inode_info *alloc_pipe_info(void) ...@@ -679,7 +716,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (pipe->bufs) { if (pipe->bufs) {
init_waitqueue_head(&pipe->wait); init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1; pipe->r_counter = pipe->w_counter = 1;
pipe->buffers = pipe_bufs; pipe->max_usage = pipe_bufs;
pipe->ring_size = pipe_bufs;
pipe->user = user; pipe->user = user;
mutex_init(&pipe->mutex); mutex_init(&pipe->mutex);
return pipe; return pipe;
...@@ -697,9 +735,9 @@ void free_pipe_info(struct pipe_inode_info *pipe) ...@@ -697,9 +735,9 @@ void free_pipe_info(struct pipe_inode_info *pipe)
{ {
int i; int i;
(void) account_pipe_buffers(pipe->user, pipe->buffers, 0); (void) account_pipe_buffers(pipe->user, pipe->ring_size, 0);
free_uid(pipe->user); free_uid(pipe->user);
for (i = 0; i < pipe->buffers; i++) { for (i = 0; i < pipe->ring_size; i++) {
struct pipe_buffer *buf = pipe->bufs + i; struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops) if (buf->ops)
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
...@@ -882,7 +920,7 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) ...@@ -882,7 +920,7 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
{ {
int cur = *cnt; int cur = *cnt;
while (cur == *cnt) { while (cur == *cnt) {
pipe_wait(pipe); pipe_wait(pipe);
...@@ -957,7 +995,7 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -957,7 +995,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
} }
} }
break; break;
case FMODE_WRITE: case FMODE_WRITE:
/* /*
* O_WRONLY * O_WRONLY
...@@ -977,7 +1015,7 @@ static int fifo_open(struct inode *inode, struct file *filp) ...@@ -977,7 +1015,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
goto err_wr; goto err_wr;
} }
break; break;
case FMODE_READ | FMODE_WRITE: case FMODE_READ | FMODE_WRITE:
/* /*
* O_RDWR * O_RDWR
...@@ -1056,14 +1094,14 @@ unsigned int round_pipe_size(unsigned long size) ...@@ -1056,14 +1094,14 @@ unsigned int round_pipe_size(unsigned long size)
static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
{ {
struct pipe_buffer *bufs; struct pipe_buffer *bufs;
unsigned int size, nr_pages; unsigned int size, nr_slots, head, tail, mask, n;
unsigned long user_bufs; unsigned long user_bufs;
long ret = 0; long ret = 0;
size = round_pipe_size(arg); size = round_pipe_size(arg);
nr_pages = size >> PAGE_SHIFT; nr_slots = size >> PAGE_SHIFT;
if (!nr_pages) if (!nr_slots)
return -EINVAL; return -EINVAL;
/* /*
...@@ -1073,13 +1111,13 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) ...@@ -1073,13 +1111,13 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
* Decreasing the pipe capacity is always permitted, even * Decreasing the pipe capacity is always permitted, even
* if the user is currently over a limit. * if the user is currently over a limit.
*/ */
if (nr_pages > pipe->buffers && if (nr_slots > pipe->ring_size &&
size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
return -EPERM; return -EPERM;
user_bufs = account_pipe_buffers(pipe->user, pipe->buffers, nr_pages); user_bufs = account_pipe_buffers(pipe->user, pipe->ring_size, nr_slots);
if (nr_pages > pipe->buffers && if (nr_slots > pipe->ring_size &&
(too_many_pipe_buffers_hard(user_bufs) || (too_many_pipe_buffers_hard(user_bufs) ||
too_many_pipe_buffers_soft(user_bufs)) && too_many_pipe_buffers_soft(user_bufs)) &&
is_unprivileged_user()) { is_unprivileged_user()) {
...@@ -1088,17 +1126,21 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) ...@@ -1088,17 +1126,21 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
} }
/* /*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't * We can shrink the pipe, if arg is greater than the ring occupancy.
* expect a lot of shrink+grow operations, just free and allocate * Since we don't expect a lot of shrink+grow operations, just free and
* again like we would do for growing. If the pipe currently * allocate again like we would do for growing. If the pipe currently
* contains more buffers than arg, then return busy. * contains more buffers than arg, then return busy.
*/ */
if (nr_pages < pipe->nrbufs) { mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;
n = pipe_occupancy(pipe->head, pipe->tail);
if (nr_slots < n) {
ret = -EBUSY; ret = -EBUSY;
goto out_revert_acct; goto out_revert_acct;
} }
bufs = kcalloc(nr_pages, sizeof(*bufs), bufs = kcalloc(nr_slots, sizeof(*bufs),
GFP_KERNEL_ACCOUNT | __GFP_NOWARN); GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (unlikely(!bufs)) { if (unlikely(!bufs)) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -1107,33 +1149,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) ...@@ -1107,33 +1149,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
/* /*
* The pipe array wraps around, so just start the new one at zero * The pipe array wraps around, so just start the new one at zero
* and adjust the indexes. * and adjust the indices.
*/ */
if (pipe->nrbufs) { if (n > 0) {
unsigned int tail; unsigned int h = head & mask;
unsigned int head; unsigned int t = tail & mask;
if (h > t) {
tail = pipe->curbuf + pipe->nrbufs; memcpy(bufs, pipe->bufs + t,
if (tail < pipe->buffers) n * sizeof(struct pipe_buffer));
tail = 0; } else {
else unsigned int tsize = pipe->ring_size - t;
tail &= (pipe->buffers - 1); if (h > 0)
memcpy(bufs + tsize, pipe->bufs,
head = pipe->nrbufs - tail; h * sizeof(struct pipe_buffer));
if (head) memcpy(bufs, pipe->bufs + t,
memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); tsize * sizeof(struct pipe_buffer));
if (tail) }
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
} }
pipe->curbuf = 0; head = n;
tail = 0;
kfree(pipe->bufs); kfree(pipe->bufs);
pipe->bufs = bufs; pipe->bufs = bufs;
pipe->buffers = nr_pages; pipe->ring_size = nr_slots;
return nr_pages * PAGE_SIZE; pipe->max_usage = nr_slots;
pipe->tail = tail;
pipe->head = head;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct: out_revert_acct:
(void) account_pipe_buffers(pipe->user, nr_pages, pipe->buffers); (void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size);
return ret; return ret;
} }
...@@ -1163,7 +1209,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -1163,7 +1209,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
ret = pipe_set_size(pipe, arg); ret = pipe_set_size(pipe, arg);
break; break;
case F_GETPIPE_SZ: case F_GETPIPE_SZ:
ret = pipe->buffers * PAGE_SIZE; ret = pipe->max_usage * PAGE_SIZE;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
......
...@@ -185,6 +185,9 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, ...@@ -185,6 +185,9 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd) struct splice_pipe_desc *spd)
{ {
unsigned int spd_pages = spd->nr_pages; unsigned int spd_pages = spd->nr_pages;
unsigned int tail = pipe->tail;
unsigned int head = pipe->head;
unsigned int mask = pipe->ring_size - 1;
int ret = 0, page_nr = 0; int ret = 0, page_nr = 0;
if (!spd_pages) if (!spd_pages)
...@@ -196,9 +199,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, ...@@ -196,9 +199,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
goto out; goto out;
} }
while (pipe->nrbufs < pipe->buffers) { while (!pipe_full(head, tail, pipe->max_usage)) {
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); struct pipe_buffer *buf = &pipe->bufs[head & mask];
struct pipe_buffer *buf = pipe->bufs + newbuf;
buf->page = spd->pages[page_nr]; buf->page = spd->pages[page_nr];
buf->offset = spd->partial[page_nr].offset; buf->offset = spd->partial[page_nr].offset;
...@@ -207,7 +209,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, ...@@ -207,7 +209,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
buf->ops = spd->ops; buf->ops = spd->ops;
buf->flags = 0; buf->flags = 0;
pipe->nrbufs++; head++;
pipe->head = head;
page_nr++; page_nr++;
ret += buf->len; ret += buf->len;
...@@ -228,17 +231,19 @@ EXPORT_SYMBOL_GPL(splice_to_pipe); ...@@ -228,17 +231,19 @@ EXPORT_SYMBOL_GPL(splice_to_pipe);
ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{ {
unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
int ret; int ret;
if (unlikely(!pipe->readers)) { if (unlikely(!pipe->readers)) {
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
ret = -EPIPE; ret = -EPIPE;
} else if (pipe->nrbufs == pipe->buffers) { } else if (pipe_full(head, tail, pipe->max_usage)) {
ret = -EAGAIN; ret = -EAGAIN;
} else { } else {
int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); pipe->bufs[head & mask] = *buf;
pipe->bufs[newbuf] = *buf; pipe->head = head + 1;
pipe->nrbufs++;
return buf->len; return buf->len;
} }
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
...@@ -252,14 +257,14 @@ EXPORT_SYMBOL(add_to_pipe); ...@@ -252,14 +257,14 @@ EXPORT_SYMBOL(add_to_pipe);
*/ */
int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
{ {
unsigned int buffers = READ_ONCE(pipe->buffers); unsigned int max_usage = READ_ONCE(pipe->max_usage);
spd->nr_pages_max = buffers; spd->nr_pages_max = max_usage;
if (buffers <= PIPE_DEF_BUFFERS) if (max_usage <= PIPE_DEF_BUFFERS)
return 0; return 0;
spd->pages = kmalloc_array(buffers, sizeof(struct page *), GFP_KERNEL); spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
spd->partial = kmalloc_array(buffers, sizeof(struct partial_page), spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
GFP_KERNEL); GFP_KERNEL);
if (spd->pages && spd->partial) if (spd->pages && spd->partial)
...@@ -298,10 +303,11 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, ...@@ -298,10 +303,11 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
{ {
struct iov_iter to; struct iov_iter to;
struct kiocb kiocb; struct kiocb kiocb;
int idx, ret; unsigned int i_head;
int ret;
iov_iter_pipe(&to, READ, pipe, len); iov_iter_pipe(&to, READ, pipe, len);
idx = to.idx; i_head = to.head;
init_sync_kiocb(&kiocb, in); init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos; kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to); ret = call_read_iter(in, &kiocb, &to);
...@@ -309,7 +315,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, ...@@ -309,7 +315,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
*ppos = kiocb.ki_pos; *ppos = kiocb.ki_pos;
file_accessed(in); file_accessed(in);
} else if (ret < 0) { } else if (ret < 0) {
to.idx = idx; to.head = i_head;
to.iov_offset = 0; to.iov_offset = 0;
iov_iter_advance(&to, 0); /* to free what was emitted */ iov_iter_advance(&to, 0); /* to free what was emitted */
/* /*
...@@ -370,11 +376,12 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, ...@@ -370,11 +376,12 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct iov_iter to; struct iov_iter to;
struct page **pages; struct page **pages;
unsigned int nr_pages; unsigned int nr_pages;
unsigned int mask;
size_t offset, base, copied = 0; size_t offset, base, copied = 0;
ssize_t res; ssize_t res;
int i; int i;
if (pipe->nrbufs == pipe->buffers) if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return -EAGAIN; return -EAGAIN;
/* /*
...@@ -400,8 +407,9 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, ...@@ -400,8 +407,9 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
} }
} }
pipe->bufs[to.idx].offset = offset; mask = pipe->ring_size - 1;
pipe->bufs[to.idx].len -= offset; pipe->bufs[to.head & mask].offset = offset;
pipe->bufs[to.head & mask].len -= offset;
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
...@@ -443,7 +451,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, ...@@ -443,7 +451,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
if (sd->len < sd->total_len && pipe->nrbufs > 1) if (sd->len < sd->total_len &&
pipe_occupancy(pipe->head, pipe->tail) > 1)
more |= MSG_SENDPAGE_NOTLAST; more |= MSG_SENDPAGE_NOTLAST;
return file->f_op->sendpage(file, buf->page, buf->offset, return file->f_op->sendpage(file, buf->page, buf->offset,
...@@ -481,10 +490,13 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe) ...@@ -481,10 +490,13 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
splice_actor *actor) splice_actor *actor)
{ {
unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
int ret; int ret;
while (pipe->nrbufs) { while (!pipe_empty(tail, head)) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; struct pipe_buffer *buf = &pipe->bufs[tail & mask];
sd->len = buf->len; sd->len = buf->len;
if (sd->len > sd->total_len) if (sd->len > sd->total_len)
...@@ -511,8 +523,8 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des ...@@ -511,8 +523,8 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
if (!buf->len) { if (!buf->len) {
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); tail++;
pipe->nrbufs--; pipe->tail = tail;
if (pipe->files) if (pipe->files)
sd->need_wakeup = true; sd->need_wakeup = true;
} }
...@@ -543,7 +555,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des ...@@ -543,7 +555,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
if (signal_pending(current)) if (signal_pending(current))
return -ERESTARTSYS; return -ERESTARTSYS;
while (!pipe->nrbufs) { while (pipe_empty(pipe->head, pipe->tail)) {
if (!pipe->writers) if (!pipe->writers)
return 0; return 0;
...@@ -686,7 +698,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -686,7 +698,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
.pos = *ppos, .pos = *ppos,
.u.file = out, .u.file = out,
}; };
int nbufs = pipe->buffers; int nbufs = pipe->max_usage;
struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
GFP_KERNEL); GFP_KERNEL);
ssize_t ret; ssize_t ret;
...@@ -699,16 +711,19 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -699,16 +711,19 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
splice_from_pipe_begin(&sd); splice_from_pipe_begin(&sd);
while (sd.total_len) { while (sd.total_len) {
struct iov_iter from; struct iov_iter from;
unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
size_t left; size_t left;
int n, idx; int n;
ret = splice_from_pipe_next(pipe, &sd); ret = splice_from_pipe_next(pipe, &sd);
if (ret <= 0) if (ret <= 0)
break; break;
if (unlikely(nbufs < pipe->buffers)) { if (unlikely(nbufs < pipe->max_usage)) {
kfree(array); kfree(array);
nbufs = pipe->buffers; nbufs = pipe->max_usage;
array = kcalloc(nbufs, sizeof(struct bio_vec), array = kcalloc(nbufs, sizeof(struct bio_vec),
GFP_KERNEL); GFP_KERNEL);
if (!array) { if (!array) {
...@@ -719,16 +734,13 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -719,16 +734,13 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
/* build the vector */ /* build the vector */
left = sd.total_len; left = sd.total_len;
for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) { for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) {
struct pipe_buffer *buf = pipe->bufs + idx; struct pipe_buffer *buf = &pipe->bufs[tail & mask];
size_t this_len = buf->len; size_t this_len = buf->len;
if (this_len > left) if (this_len > left)
this_len = left; this_len = left;
if (idx == pipe->buffers - 1)
idx = -1;
ret = pipe_buf_confirm(pipe, buf); ret = pipe_buf_confirm(pipe, buf);
if (unlikely(ret)) { if (unlikely(ret)) {
if (ret == -ENODATA) if (ret == -ENODATA)
...@@ -752,14 +764,15 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -752,14 +764,15 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
*ppos = sd.pos; *ppos = sd.pos;
/* dismiss the fully eaten buffers, adjust the partial one */ /* dismiss the fully eaten buffers, adjust the partial one */
tail = pipe->tail;
while (ret) { while (ret) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; struct pipe_buffer *buf = &pipe->bufs[tail & mask];
if (ret >= buf->len) { if (ret >= buf->len) {
ret -= buf->len; ret -= buf->len;
buf->len = 0; buf->len = 0;
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); tail++;
pipe->nrbufs--; pipe->tail = tail;
if (pipe->files) if (pipe->files)
sd.need_wakeup = true; sd.need_wakeup = true;
} else { } else {
...@@ -942,16 +955,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -942,16 +955,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
sd->flags &= ~SPLICE_F_NONBLOCK; sd->flags &= ~SPLICE_F_NONBLOCK;
more = sd->flags & SPLICE_F_MORE; more = sd->flags & SPLICE_F_MORE;
WARN_ON_ONCE(pipe->nrbufs != 0); WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
while (len) { while (len) {
unsigned int pipe_pages; unsigned int p_space;
size_t read_len; size_t read_len;
loff_t pos = sd->pos, prev_pos = pos; loff_t pos = sd->pos, prev_pos = pos;
/* Don't try to read more the pipe has space for. */ /* Don't try to read more the pipe has space for. */
pipe_pages = pipe->buffers - pipe->nrbufs; p_space = pipe->max_usage -
read_len = min(len, (size_t)pipe_pages << PAGE_SHIFT); pipe_occupancy(pipe->head, pipe->tail);
read_len = min_t(size_t, len, p_space << PAGE_SHIFT);
ret = do_splice_to(in, &pos, pipe, read_len, flags); ret = do_splice_to(in, &pos, pipe, read_len, flags);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
goto out_release; goto out_release;
...@@ -990,7 +1004,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -990,7 +1004,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
} }
done: done:
pipe->nrbufs = pipe->curbuf = 0; pipe->tail = pipe->head = 0;
file_accessed(in); file_accessed(in);
return bytes; return bytes;
...@@ -999,8 +1013,8 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -999,8 +1013,8 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
* If we did an incomplete transfer we must release * If we did an incomplete transfer we must release
* the pipe buffers in question: * the pipe buffers in question:
*/ */
for (i = 0; i < pipe->buffers; i++) { for (i = 0; i < pipe->ring_size; i++) {
struct pipe_buffer *buf = pipe->bufs + i; struct pipe_buffer *buf = &pipe->bufs[i];
if (buf->ops) if (buf->ops)
pipe_buf_release(pipe, buf); pipe_buf_release(pipe, buf);
...@@ -1076,7 +1090,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) ...@@ -1076,7 +1090,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
return -EPIPE; return -EPIPE;
} }
if (pipe->nrbufs != pipe->buffers) if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return 0; return 0;
if (flags & SPLICE_F_NONBLOCK) if (flags & SPLICE_F_NONBLOCK)
return -EAGAIN; return -EAGAIN;
...@@ -1182,11 +1196,11 @@ static long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1182,11 +1196,11 @@ static long do_splice(struct file *in, loff_t __user *off_in,
pipe_lock(opipe); pipe_lock(opipe);
ret = wait_for_space(opipe, flags); ret = wait_for_space(opipe, flags);
if (!ret) { if (!ret) {
unsigned int pipe_pages; unsigned int p_space;
/* Don't try to read more the pipe has space for. */ /* Don't try to read more the pipe has space for. */
pipe_pages = opipe->buffers - opipe->nrbufs; p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail);
len = min(len, (size_t)pipe_pages << PAGE_SHIFT); len = min_t(size_t, len, p_space << PAGE_SHIFT);
ret = do_splice_to(in, &offset, opipe, len, flags); ret = do_splice_to(in, &offset, opipe, len, flags);
} }
...@@ -1450,16 +1464,16 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ...@@ -1450,16 +1464,16 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
int ret; int ret;
/* /*
* Check ->nrbufs without the inode lock first. This function * Check the pipe occupancy without the inode lock first. This function
* is speculative anyways, so missing one is ok. * is speculative anyways, so missing one is ok.
*/ */
if (pipe->nrbufs) if (!pipe_empty(pipe->head, pipe->tail))
return 0; return 0;
ret = 0; ret = 0;
pipe_lock(pipe); pipe_lock(pipe);
while (!pipe->nrbufs) { while (pipe_empty(pipe->head, pipe->tail)) {
if (signal_pending(current)) { if (signal_pending(current)) {
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
break; break;
...@@ -1488,16 +1502,16 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ...@@ -1488,16 +1502,16 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
int ret; int ret;
/* /*
* Check ->nrbufs without the inode lock first. This function * Check pipe occupancy without the inode lock first. This function
* is speculative anyways, so missing one is ok. * is speculative anyways, so missing one is ok.
*/ */
if (pipe->nrbufs < pipe->buffers) if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return 0; return 0;
ret = 0; ret = 0;
pipe_lock(pipe); pipe_lock(pipe);
while (pipe->nrbufs >= pipe->buffers) { while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
if (!pipe->readers) { if (!pipe->readers) {
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
ret = -EPIPE; ret = -EPIPE;
...@@ -1528,7 +1542,10 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1528,7 +1542,10 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
size_t len, unsigned int flags) size_t len, unsigned int flags)
{ {
struct pipe_buffer *ibuf, *obuf; struct pipe_buffer *ibuf, *obuf;
int ret = 0, nbuf; unsigned int i_head, o_head;
unsigned int i_tail, o_tail;
unsigned int i_mask, o_mask;
int ret = 0;
bool input_wakeup = false; bool input_wakeup = false;
...@@ -1548,7 +1565,14 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1548,7 +1565,14 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
*/ */
pipe_double_lock(ipipe, opipe); pipe_double_lock(ipipe, opipe);
i_tail = ipipe->tail;
i_mask = ipipe->ring_size - 1;
o_head = opipe->head;
o_mask = opipe->ring_size - 1;
do { do {
size_t o_len;
if (!opipe->readers) { if (!opipe->readers) {
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
if (!ret) if (!ret)
...@@ -1556,14 +1580,18 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1556,14 +1580,18 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
break; break;
} }
if (!ipipe->nrbufs && !ipipe->writers) i_head = ipipe->head;
o_tail = opipe->tail;
if (pipe_empty(i_head, i_tail) && !ipipe->writers)
break; break;
/* /*
* Cannot make any progress, because either the input * Cannot make any progress, because either the input
* pipe is empty or the output pipe is full. * pipe is empty or the output pipe is full.
*/ */
if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { if (pipe_empty(i_head, i_tail) ||
pipe_full(o_head, o_tail, opipe->max_usage)) {
/* Already processed some buffers, break */ /* Already processed some buffers, break */
if (ret) if (ret)
break; break;
...@@ -1583,9 +1611,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1583,9 +1611,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
goto retry; goto retry;
} }
ibuf = ipipe->bufs + ipipe->curbuf; ibuf = &ipipe->bufs[i_tail & i_mask];
nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); obuf = &opipe->bufs[o_head & o_mask];
obuf = opipe->bufs + nbuf;
if (len >= ibuf->len) { if (len >= ibuf->len) {
/* /*
...@@ -1593,10 +1620,12 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1593,10 +1620,12 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
*/ */
*obuf = *ibuf; *obuf = *ibuf;
ibuf->ops = NULL; ibuf->ops = NULL;
opipe->nrbufs++; i_tail++;
ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); ipipe->tail = i_tail;
ipipe->nrbufs--;
input_wakeup = true; input_wakeup = true;
o_len = obuf->len;
o_head++;
opipe->head = o_head;
} else { } else {
/* /*
* Get a reference to this pipe buffer, * Get a reference to this pipe buffer,
...@@ -1618,12 +1647,14 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1618,12 +1647,14 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
pipe_buf_mark_unmergeable(obuf); pipe_buf_mark_unmergeable(obuf);
obuf->len = len; obuf->len = len;
opipe->nrbufs++; ibuf->offset += len;
ibuf->offset += obuf->len; ibuf->len -= len;
ibuf->len -= obuf->len; o_len = len;
o_head++;
opipe->head = o_head;
} }
ret += obuf->len; ret += o_len;
len -= obuf->len; len -= o_len;
} while (len); } while (len);
pipe_unlock(ipipe); pipe_unlock(ipipe);
...@@ -1649,7 +1680,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1649,7 +1680,10 @@ static int link_pipe(struct pipe_inode_info *ipipe,
size_t len, unsigned int flags) size_t len, unsigned int flags)
{ {
struct pipe_buffer *ibuf, *obuf; struct pipe_buffer *ibuf, *obuf;
int ret = 0, i = 0, nbuf; unsigned int i_head, o_head;
unsigned int i_tail, o_tail;
unsigned int i_mask, o_mask;
int ret = 0;
/* /*
* Potential ABBA deadlock, work around it by ordering lock * Potential ABBA deadlock, work around it by ordering lock
...@@ -1658,6 +1692,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1658,6 +1692,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
*/ */
pipe_double_lock(ipipe, opipe); pipe_double_lock(ipipe, opipe);
i_tail = ipipe->tail;
i_mask = ipipe->ring_size - 1;
o_head = opipe->head;
o_mask = opipe->ring_size - 1;
do { do {
if (!opipe->readers) { if (!opipe->readers) {
send_sig(SIGPIPE, current, 0); send_sig(SIGPIPE, current, 0);
...@@ -1666,15 +1705,19 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1666,15 +1705,19 @@ static int link_pipe(struct pipe_inode_info *ipipe,
break; break;
} }
i_head = ipipe->head;
o_tail = opipe->tail;
/* /*
* If we have iterated all input buffers or ran out of * If we have iterated all input buffers or run out of
* output room, break. * output room, break.
*/ */
if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) if (pipe_empty(i_head, i_tail) ||
pipe_full(o_head, o_tail, opipe->max_usage))
break; break;
ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); ibuf = &ipipe->bufs[i_tail & i_mask];
nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); obuf = &opipe->bufs[o_head & o_mask];
/* /*
* Get a reference to this pipe buffer, * Get a reference to this pipe buffer,
...@@ -1686,7 +1729,6 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1686,7 +1729,6 @@ static int link_pipe(struct pipe_inode_info *ipipe,
break; break;
} }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf; *obuf = *ibuf;
/* /*
...@@ -1699,11 +1741,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1699,11 +1741,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
if (obuf->len > len) if (obuf->len > len)
obuf->len = len; obuf->len = len;
opipe->nrbufs++;
ret += obuf->len; ret += obuf->len;
len -= obuf->len; len -= obuf->len;
i++;
o_head++;
opipe->head = o_head;
i_tail++;
} while (len); } while (len);
/* /*
......
...@@ -30,9 +30,10 @@ struct pipe_buffer { ...@@ -30,9 +30,10 @@ struct pipe_buffer {
* struct pipe_inode_info - a linux kernel pipe * struct pipe_inode_info - a linux kernel pipe
* @mutex: mutex protecting the whole thing * @mutex: mutex protecting the whole thing
* @wait: reader/writer wait point in case of empty/full pipe * @wait: reader/writer wait point in case of empty/full pipe
* @nrbufs: the number of non-empty pipe buffers in this pipe * @head: The point of buffer production
* @buffers: total number of buffers (should be a power of 2) * @tail: The point of buffer consumption
* @curbuf: the current pipe buffer entry * @max_usage: The maximum number of slots that may be used in the ring
* @ring_size: total number of buffers (should be a power of 2)
* @tmp_page: cached released page * @tmp_page: cached released page
* @readers: number of current readers of this pipe * @readers: number of current readers of this pipe
* @writers: number of current writers of this pipe * @writers: number of current writers of this pipe
...@@ -48,7 +49,10 @@ struct pipe_buffer { ...@@ -48,7 +49,10 @@ struct pipe_buffer {
struct pipe_inode_info { struct pipe_inode_info {
struct mutex mutex; struct mutex mutex;
wait_queue_head_t wait; wait_queue_head_t wait;
unsigned int nrbufs, curbuf, buffers; unsigned int head;
unsigned int tail;
unsigned int max_usage;
unsigned int ring_size;
unsigned int readers; unsigned int readers;
unsigned int writers; unsigned int writers;
unsigned int files; unsigned int files;
...@@ -104,6 +108,58 @@ struct pipe_buf_operations { ...@@ -104,6 +108,58 @@ struct pipe_buf_operations {
bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
}; };
/**
* pipe_empty - Return true if the pipe is empty
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
*/
static inline bool pipe_empty(unsigned int head, unsigned int tail)
{
return head == tail;
}
/**
* pipe_occupancy - Return number of slots used in the pipe
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
*/
static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail)
{
return head - tail;
}
/**
* pipe_full - Return true if the pipe is full
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
* @limit: The maximum amount of slots available.
*/
static inline bool pipe_full(unsigned int head, unsigned int tail,
unsigned int limit)
{
return pipe_occupancy(head, tail) >= limit;
}
/**
* pipe_space_for_user - Return number of slots available to userspace
* @head: The pipe ring head pointer
* @tail: The pipe ring tail pointer
* @pipe: The pipe info structure
*/
static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail,
struct pipe_inode_info *pipe)
{
unsigned int p_occupancy, p_space;
p_occupancy = pipe_occupancy(head, tail);
if (p_occupancy >= pipe->max_usage)
return 0;
p_space = pipe->ring_size - p_occupancy;
if (p_space > pipe->max_usage)
p_space = pipe->max_usage;
return p_space;
}
/** /**
* pipe_buf_get - get a reference to a pipe_buffer * pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to * @pipe: the pipe that the buffer belongs to
......
...@@ -45,8 +45,8 @@ struct iov_iter { ...@@ -45,8 +45,8 @@ struct iov_iter {
union { union {
unsigned long nr_segs; unsigned long nr_segs;
struct { struct {
int idx; unsigned int head;
int start_idx; unsigned int start_head;
}; };
}; };
}; };
......
...@@ -201,9 +201,10 @@ void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void ...@@ -201,9 +201,10 @@ void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark); unsigned int mode, void *key, wait_queue_entry_t *bookmark);
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
...@@ -214,7 +215,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); ...@@ -214,7 +215,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE)
/* /*
* Wakeup macros to be used to report events to the targets. * Wakeup macros to be used to report events to the targets.
...@@ -228,7 +229,9 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); ...@@ -228,7 +229,9 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
#define wake_up_interruptible_poll(x, m) \ #define wake_up_interruptible_poll(x, m) \
__wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m)) __wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m))
#define wake_up_interruptible_sync_poll(x, m) \ #define wake_up_interruptible_sync_poll(x, m) \
__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m)) __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
#define wake_up_interruptible_sync_poll_locked(x, m) \
__wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
#define ___wait_cond_timeout(condition) \ #define ___wait_cond_timeout(condition) \
({ \ ({ \
......
...@@ -1409,7 +1409,7 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, ...@@ -1409,7 +1409,7 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
void __wake_up_parent(struct task_struct *p, struct task_struct *parent) void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
{ {
__wake_up_sync_key(&parent->signal->wait_chldexit, __wake_up_sync_key(&parent->signal->wait_chldexit,
TASK_INTERRUPTIBLE, 1, p); TASK_INTERRUPTIBLE, p);
} }
static long do_wait(struct wait_opts *wo) static long do_wait(struct wait_opts *wo)
......
...@@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); ...@@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
* __wake_up_sync_key - wake up threads blocked on a waitqueue. * __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue * @wq_head: the waitqueue
* @mode: which threads * @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: opaque value to be passed to wakeup targets * @key: opaque value to be passed to wakeup targets
* *
* The sync wakeup differs that the waker knows that it will schedule * The sync wakeup differs that the waker knows that it will schedule
...@@ -183,26 +182,44 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); ...@@ -183,26 +182,44 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
* accessing the task state. * accessing the task state.
*/ */
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key) void *key)
{ {
int wake_flags = 1; /* XXX WF_SYNC */
if (unlikely(!wq_head)) if (unlikely(!wq_head))
return; return;
if (unlikely(nr_exclusive != 1)) __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
wake_flags = 0;
__wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key);
} }
EXPORT_SYMBOL_GPL(__wake_up_sync_key); EXPORT_SYMBOL_GPL(__wake_up_sync_key);
/**
* __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
* @key: opaque value to be passed to wakeup targets
*
* The sync wakeup differs in that the waker knows that it will schedule
* away soon, so while the target thread will be woken up, it will not
* be migrated to another CPU - ie. the two threads are 'synchronized'
* with each other. This can prevent needless bouncing between CPUs.
*
* On UP it can prevent extra preemption.
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*/
void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
unsigned int mode, void *key)
{
__wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
/* /*
* __wake_up_sync - see __wake_up_sync_key() * __wake_up_sync - see __wake_up_sync_key()
*/ */
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive) void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
{ {
__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL); __wake_up_sync_key(wq_head, mode, NULL);
} }
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
......
...@@ -325,28 +325,33 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t ...@@ -325,28 +325,33 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
static bool sanity(const struct iov_iter *i) static bool sanity(const struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
int idx = i->idx; unsigned int p_head = pipe->head;
int next = pipe->curbuf + pipe->nrbufs; unsigned int p_tail = pipe->tail;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
unsigned int i_head = i->head;
unsigned int idx;
if (i->iov_offset) { if (i->iov_offset) {
struct pipe_buffer *p; struct pipe_buffer *p;
if (unlikely(!pipe->nrbufs)) if (unlikely(p_occupancy == 0))
goto Bad; // pipe must be non-empty goto Bad; // pipe must be non-empty
if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) if (unlikely(i_head != p_head - 1))
goto Bad; // must be at the last buffer... goto Bad; // must be at the last buffer...
p = &pipe->bufs[idx]; p = &pipe->bufs[i_head & p_mask];
if (unlikely(p->offset + p->len != i->iov_offset)) if (unlikely(p->offset + p->len != i->iov_offset))
goto Bad; // ... at the end of segment goto Bad; // ... at the end of segment
} else { } else {
if (idx != (next & (pipe->buffers - 1))) if (i_head != p_head)
goto Bad; // must be right after the last buffer goto Bad; // must be right after the last buffer
} }
return true; return true;
Bad: Bad:
printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
pipe->curbuf, pipe->nrbufs, pipe->buffers); p_head, p_tail, pipe->ring_size);
for (idx = 0; idx < pipe->buffers; idx++) for (idx = 0; idx < pipe->ring_size; idx++)
printk(KERN_ERR "[%p %p %d %d]\n", printk(KERN_ERR "[%p %p %d %d]\n",
pipe->bufs[idx].ops, pipe->bufs[idx].ops,
pipe->bufs[idx].page, pipe->bufs[idx].page,
...@@ -359,18 +364,15 @@ static bool sanity(const struct iov_iter *i) ...@@ -359,18 +364,15 @@ static bool sanity(const struct iov_iter *i)
#define sanity(i) true #define sanity(i) true
#endif #endif
static inline int next_idx(int idx, struct pipe_inode_info *pipe)
{
return (idx + 1) & (pipe->buffers - 1);
}
static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i) struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
struct pipe_buffer *buf; struct pipe_buffer *buf;
unsigned int p_tail = pipe->tail;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head = i->head;
size_t off; size_t off;
int idx;
if (unlikely(bytes > i->count)) if (unlikely(bytes > i->count))
bytes = i->count; bytes = i->count;
...@@ -382,8 +384,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by ...@@ -382,8 +384,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
return 0; return 0;
off = i->iov_offset; off = i->iov_offset;
idx = i->idx; buf = &pipe->bufs[i_head & p_mask];
buf = &pipe->bufs[idx];
if (off) { if (off) {
if (offset == off && buf->page == page) { if (offset == off && buf->page == page) {
/* merge with the last one */ /* merge with the last one */
...@@ -391,18 +392,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by ...@@ -391,18 +392,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
i->iov_offset += bytes; i->iov_offset += bytes;
goto out; goto out;
} }
idx = next_idx(idx, pipe); i_head++;
buf = &pipe->bufs[idx]; buf = &pipe->bufs[i_head & p_mask];
} }
if (idx == pipe->curbuf && pipe->nrbufs) if (pipe_full(i_head, p_tail, pipe->max_usage))
return 0; return 0;
pipe->nrbufs++;
buf->ops = &page_cache_pipe_buf_ops; buf->ops = &page_cache_pipe_buf_ops;
get_page(buf->page = page); get_page(page);
buf->page = page;
buf->offset = offset; buf->offset = offset;
buf->len = bytes; buf->len = bytes;
pipe->head = i_head + 1;
i->iov_offset = offset + bytes; i->iov_offset = offset + bytes;
i->idx = idx; i->head = i_head;
out: out:
i->count -= bytes; i->count -= bytes;
return bytes; return bytes;
...@@ -480,24 +484,30 @@ static inline bool allocated(struct pipe_buffer *buf) ...@@ -480,24 +484,30 @@ static inline bool allocated(struct pipe_buffer *buf)
return buf->ops == &default_pipe_buf_ops; return buf->ops == &default_pipe_buf_ops;
} }
static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) static inline void data_start(const struct iov_iter *i,
unsigned int *iter_headp, size_t *offp)
{ {
unsigned int p_mask = i->pipe->ring_size - 1;
unsigned int iter_head = i->head;
size_t off = i->iov_offset; size_t off = i->iov_offset;
int idx = i->idx;
if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
idx = next_idx(idx, i->pipe); off == PAGE_SIZE)) {
iter_head++;
off = 0; off = 0;
} }
*idxp = idx; *iter_headp = iter_head;
*offp = off; *offp = off;
} }
static size_t push_pipe(struct iov_iter *i, size_t size, static size_t push_pipe(struct iov_iter *i, size_t size,
int *idxp, size_t *offp) int *iter_headp, size_t *offp)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int p_tail = pipe->tail;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int iter_head;
size_t off; size_t off;
int idx;
ssize_t left; ssize_t left;
if (unlikely(size > i->count)) if (unlikely(size > i->count))
...@@ -506,33 +516,34 @@ static size_t push_pipe(struct iov_iter *i, size_t size, ...@@ -506,33 +516,34 @@ static size_t push_pipe(struct iov_iter *i, size_t size,
return 0; return 0;
left = size; left = size;
data_start(i, &idx, &off); data_start(i, &iter_head, &off);
*idxp = idx; *iter_headp = iter_head;
*offp = off; *offp = off;
if (off) { if (off) {
left -= PAGE_SIZE - off; left -= PAGE_SIZE - off;
if (left <= 0) { if (left <= 0) {
pipe->bufs[idx].len += size; pipe->bufs[iter_head & p_mask].len += size;
return size; return size;
} }
pipe->bufs[idx].len = PAGE_SIZE; pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
idx = next_idx(idx, pipe); iter_head++;
} }
while (idx != pipe->curbuf || !pipe->nrbufs) { while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
struct page *page = alloc_page(GFP_USER); struct page *page = alloc_page(GFP_USER);
if (!page) if (!page)
break; break;
pipe->nrbufs++;
pipe->bufs[idx].ops = &default_pipe_buf_ops; buf->ops = &default_pipe_buf_ops;
pipe->bufs[idx].page = page; buf->page = page;
pipe->bufs[idx].offset = 0; buf->offset = 0;
if (left <= PAGE_SIZE) { buf->len = min_t(ssize_t, left, PAGE_SIZE);
pipe->bufs[idx].len = left; left -= buf->len;
iter_head++;
pipe->head = iter_head;
if (left == 0)
return size; return size;
}
pipe->bufs[idx].len = PAGE_SIZE;
left -= PAGE_SIZE;
idx = next_idx(idx, pipe);
} }
return size - left; return size - left;
} }
...@@ -541,23 +552,26 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, ...@@ -541,23 +552,26 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i) struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
size_t n, off; size_t n, off;
int idx;
if (!sanity(i)) if (!sanity(i))
return 0; return 0;
bytes = n = push_pipe(i, bytes, &idx, &off); bytes = n = push_pipe(i, bytes, &i_head, &off);
if (unlikely(!n)) if (unlikely(!n))
return 0; return 0;
for ( ; n; idx = next_idx(idx, pipe), off = 0) { do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - off); size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
i->idx = idx; i->head = i_head;
i->iov_offset = off + chunk; i->iov_offset = off + chunk;
n -= chunk; n -= chunk;
addr += chunk; addr += chunk;
} off = 0;
i_head++;
} while (n);
i->count -= bytes; i->count -= bytes;
return bytes; return bytes;
} }
...@@ -573,28 +587,31 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, ...@@ -573,28 +587,31 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
__wsum *csum, struct iov_iter *i) __wsum *csum, struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
size_t n, r; size_t n, r;
size_t off = 0; size_t off = 0;
__wsum sum = *csum; __wsum sum = *csum;
int idx;
if (!sanity(i)) if (!sanity(i))
return 0; return 0;
bytes = n = push_pipe(i, bytes, &idx, &r); bytes = n = push_pipe(i, bytes, &i_head, &r);
if (unlikely(!n)) if (unlikely(!n))
return 0; return 0;
for ( ; n; idx = next_idx(idx, pipe), r = 0) { do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - r); size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
char *p = kmap_atomic(pipe->bufs[idx].page); char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
sum = csum_and_memcpy(p + r, addr, chunk, sum, off); sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
kunmap_atomic(p); kunmap_atomic(p);
i->idx = idx; i->head = i_head;
i->iov_offset = r + chunk; i->iov_offset = r + chunk;
n -= chunk; n -= chunk;
off += chunk; off += chunk;
addr += chunk; addr += chunk;
} r = 0;
i_head++;
} while (n);
i->count -= bytes; i->count -= bytes;
*csum = sum; *csum = sum;
return bytes; return bytes;
...@@ -645,29 +662,32 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, ...@@ -645,29 +662,32 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
struct iov_iter *i) struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
size_t n, off, xfer = 0; size_t n, off, xfer = 0;
int idx;
if (!sanity(i)) if (!sanity(i))
return 0; return 0;
bytes = n = push_pipe(i, bytes, &idx, &off); bytes = n = push_pipe(i, bytes, &i_head, &off);
if (unlikely(!n)) if (unlikely(!n))
return 0; return 0;
for ( ; n; idx = next_idx(idx, pipe), off = 0) { do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - off); size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
unsigned long rem; unsigned long rem;
rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
chunk); off, addr, chunk);
i->idx = idx; i->head = i_head;
i->iov_offset = off + chunk - rem; i->iov_offset = off + chunk - rem;
xfer += chunk - rem; xfer += chunk - rem;
if (rem) if (rem)
break; break;
n -= chunk; n -= chunk;
addr += chunk; addr += chunk;
} off = 0;
i_head++;
} while (n);
i->count -= xfer; i->count -= xfer;
return xfer; return xfer;
} }
...@@ -925,23 +945,26 @@ EXPORT_SYMBOL(copy_page_from_iter); ...@@ -925,23 +945,26 @@ EXPORT_SYMBOL(copy_page_from_iter);
static size_t pipe_zero(size_t bytes, struct iov_iter *i) static size_t pipe_zero(size_t bytes, struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
size_t n, off; size_t n, off;
int idx;
if (!sanity(i)) if (!sanity(i))
return 0; return 0;
bytes = n = push_pipe(i, bytes, &idx, &off); bytes = n = push_pipe(i, bytes, &i_head, &off);
if (unlikely(!n)) if (unlikely(!n))
return 0; return 0;
for ( ; n; idx = next_idx(idx, pipe), off = 0) { do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - off); size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
memzero_page(pipe->bufs[idx].page, off, chunk); memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
i->idx = idx; i->head = i_head;
i->iov_offset = off + chunk; i->iov_offset = off + chunk;
n -= chunk; n -= chunk;
} off = 0;
i_head++;
} while (n);
i->count -= bytes; i->count -= bytes;
return bytes; return bytes;
} }
...@@ -987,20 +1010,26 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); ...@@ -987,20 +1010,26 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
static inline void pipe_truncate(struct iov_iter *i) static inline void pipe_truncate(struct iov_iter *i)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
if (pipe->nrbufs) { unsigned int p_tail = pipe->tail;
unsigned int p_head = pipe->head;
unsigned int p_mask = pipe->ring_size - 1;
if (!pipe_empty(p_head, p_tail)) {
struct pipe_buffer *buf;
unsigned int i_head = i->head;
size_t off = i->iov_offset; size_t off = i->iov_offset;
int idx = i->idx;
int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
if (off) { if (off) {
pipe->bufs[idx].len = off - pipe->bufs[idx].offset; buf = &pipe->bufs[i_head & p_mask];
idx = next_idx(idx, pipe); buf->len = off - buf->offset;
nrbufs++; i_head++;
} }
while (pipe->nrbufs > nrbufs) { while (p_head != i_head) {
pipe_buf_release(pipe, &pipe->bufs[idx]); p_head--;
idx = next_idx(idx, pipe); pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
pipe->nrbufs--;
} }
pipe->head = p_head;
} }
} }
...@@ -1011,18 +1040,20 @@ static void pipe_advance(struct iov_iter *i, size_t size) ...@@ -1011,18 +1040,20 @@ static void pipe_advance(struct iov_iter *i, size_t size)
size = i->count; size = i->count;
if (size) { if (size) {
struct pipe_buffer *buf; struct pipe_buffer *buf;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head = i->head;
size_t off = i->iov_offset, left = size; size_t off = i->iov_offset, left = size;
int idx = i->idx;
if (off) /* make it relative to the beginning of buffer */ if (off) /* make it relative to the beginning of buffer */
left += off - pipe->bufs[idx].offset; left += off - pipe->bufs[i_head & p_mask].offset;
while (1) { while (1) {
buf = &pipe->bufs[idx]; buf = &pipe->bufs[i_head & p_mask];
if (left <= buf->len) if (left <= buf->len)
break; break;
left -= buf->len; left -= buf->len;
idx = next_idx(idx, pipe); i_head++;
} }
i->idx = idx; i->head = i_head;
i->iov_offset = buf->offset + left; i->iov_offset = buf->offset + left;
} }
i->count -= size; i->count -= size;
...@@ -1053,25 +1084,27 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) ...@@ -1053,25 +1084,27 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
i->count += unroll; i->count += unroll;
if (unlikely(iov_iter_is_pipe(i))) { if (unlikely(iov_iter_is_pipe(i))) {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
int idx = i->idx; unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head = i->head;
size_t off = i->iov_offset; size_t off = i->iov_offset;
while (1) { while (1) {
size_t n = off - pipe->bufs[idx].offset; struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
size_t n = off - b->offset;
if (unroll < n) { if (unroll < n) {
off -= unroll; off -= unroll;
break; break;
} }
unroll -= n; unroll -= n;
if (!unroll && idx == i->start_idx) { if (!unroll && i_head == i->start_head) {
off = 0; off = 0;
break; break;
} }
if (!idx--) i_head--;
idx = pipe->buffers - 1; b = &pipe->bufs[i_head & p_mask];
off = pipe->bufs[idx].offset + pipe->bufs[idx].len; off = b->offset + b->len;
} }
i->iov_offset = off; i->iov_offset = off;
i->idx = idx; i->head = i_head;
pipe_truncate(i); pipe_truncate(i);
return; return;
} }
...@@ -1159,13 +1192,13 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, ...@@ -1159,13 +1192,13 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
size_t count) size_t count)
{ {
BUG_ON(direction != READ); BUG_ON(direction != READ);
WARN_ON(pipe->nrbufs == pipe->buffers); WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
i->type = ITER_PIPE | READ; i->type = ITER_PIPE | READ;
i->pipe = pipe; i->pipe = pipe;
i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); i->head = pipe->head;
i->iov_offset = 0; i->iov_offset = 0;
i->count = count; i->count = count;
i->start_idx = i->idx; i->start_head = i->head;
} }
EXPORT_SYMBOL(iov_iter_pipe); EXPORT_SYMBOL(iov_iter_pipe);
...@@ -1189,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard); ...@@ -1189,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard);
unsigned long iov_iter_alignment(const struct iov_iter *i) unsigned long iov_iter_alignment(const struct iov_iter *i)
{ {
unsigned int p_mask = i->pipe->ring_size - 1;
unsigned long res = 0; unsigned long res = 0;
size_t size = i->count; size_t size = i->count;
if (unlikely(iov_iter_is_pipe(i))) { if (unlikely(iov_iter_is_pipe(i))) {
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
return size | i->iov_offset; return size | i->iov_offset;
return size; return size;
} }
...@@ -1231,19 +1265,20 @@ EXPORT_SYMBOL(iov_iter_gap_alignment); ...@@ -1231,19 +1265,20 @@ EXPORT_SYMBOL(iov_iter_gap_alignment);
static inline ssize_t __pipe_get_pages(struct iov_iter *i, static inline ssize_t __pipe_get_pages(struct iov_iter *i,
size_t maxsize, size_t maxsize,
struct page **pages, struct page **pages,
int idx, int iter_head,
size_t *start) size_t *start)
{ {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
ssize_t n = push_pipe(i, maxsize, &idx, start); unsigned int p_mask = pipe->ring_size - 1;
ssize_t n = push_pipe(i, maxsize, &iter_head, start);
if (!n) if (!n)
return -EFAULT; return -EFAULT;
maxsize = n; maxsize = n;
n += *start; n += *start;
while (n > 0) { while (n > 0) {
get_page(*pages++ = pipe->bufs[idx].page); get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
idx = next_idx(idx, pipe); iter_head++;
n -= PAGE_SIZE; n -= PAGE_SIZE;
} }
...@@ -1254,9 +1289,8 @@ static ssize_t pipe_get_pages(struct iov_iter *i, ...@@ -1254,9 +1289,8 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages, struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start) size_t *start)
{ {
unsigned npages; unsigned int iter_head, npages;
size_t capacity; size_t capacity;
int idx;
if (!maxsize) if (!maxsize)
return 0; return 0;
...@@ -1264,12 +1298,12 @@ static ssize_t pipe_get_pages(struct iov_iter *i, ...@@ -1264,12 +1298,12 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
if (!sanity(i)) if (!sanity(i))
return -EFAULT; return -EFAULT;
data_start(i, &idx, start); data_start(i, &iter_head, start);
/* some of this one + all after this one */ /* Amount of free space: some of this one + all after this one */
npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
capacity = min(npages,maxpages) * PAGE_SIZE - *start; capacity = min(npages, maxpages) * PAGE_SIZE - *start;
return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
} }
ssize_t iov_iter_get_pages(struct iov_iter *i, ssize_t iov_iter_get_pages(struct iov_iter *i,
...@@ -1323,9 +1357,8 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, ...@@ -1323,9 +1357,8 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
size_t *start) size_t *start)
{ {
struct page **p; struct page **p;
unsigned int iter_head, npages;
ssize_t n; ssize_t n;
int idx;
int npages;
if (!maxsize) if (!maxsize)
return 0; return 0;
...@@ -1333,9 +1366,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, ...@@ -1333,9 +1366,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
if (!sanity(i)) if (!sanity(i))
return -EFAULT; return -EFAULT;
data_start(i, &idx, start); data_start(i, &iter_head, start);
/* some of this one + all after this one */ /* Amount of free space: some of this one + all after this one */
npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
n = npages * PAGE_SIZE - *start; n = npages * PAGE_SIZE - *start;
if (maxsize > n) if (maxsize > n)
maxsize = n; maxsize = n;
...@@ -1344,7 +1377,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, ...@@ -1344,7 +1377,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
p = get_pages_array(npages); p = get_pages_array(npages);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
n = __pipe_get_pages(i, maxsize, p, idx, start); n = __pipe_get_pages(i, maxsize, p, iter_head, start);
if (n > 0) if (n > 0)
*pages = p; *pages = p;
else else
...@@ -1560,15 +1593,15 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) ...@@ -1560,15 +1593,15 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
if (unlikely(iov_iter_is_pipe(i))) { if (unlikely(iov_iter_is_pipe(i))) {
struct pipe_inode_info *pipe = i->pipe; struct pipe_inode_info *pipe = i->pipe;
unsigned int iter_head;
size_t off; size_t off;
int idx;
if (!sanity(i)) if (!sanity(i))
return 0; return 0;
data_start(i, &idx, &off); data_start(i, &iter_head, &off);
/* some of this one + all after this one */ /* some of this one + all after this one */
npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
if (npages >= maxpages) if (npages >= maxpages)
return maxpages; return maxpages;
} else iterate_all_kinds(i, size, v, ({ } else iterate_all_kinds(i, size, v, ({
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/pipe_fs_i.h>
#include <net/cipso_ipv4.h> #include <net/cipso_ipv4.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment