Commit f9dd2134 authored by Dan Williams's avatar Dan Williams

Merge branch 'md-raid6-accel' into ioat3.2

Conflicts:
	include/linux/dmaengine.h
parents 4b652f0d 07a3b417
...@@ -54,20 +54,23 @@ features surfaced as a result: ...@@ -54,20 +54,23 @@ features surfaced as a result:
3.1 General format of the API: 3.1 General format of the API:
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_<operation>(<op specific parameters>, async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
enum async_tx_flags flags,
struct dma_async_tx_descriptor *dependency,
dma_async_tx_callback callback_routine,
void *callback_parameter);
3.2 Supported operations: 3.2 Supported operations:
memcpy - memory copy between a source and a destination buffer memcpy - memory copy between a source and a destination buffer
memset - fill a destination buffer with a byte value memset - fill a destination buffer with a byte value
xor - xor a series of source buffers and write the result to a xor - xor a series of source buffers and write the result to a
destination buffer destination buffer
xor_zero_sum - xor a series of source buffers and set a flag if the xor_val - xor a series of source buffers and set a flag if the
result is zero. The implementation attempts to prevent result is zero. The implementation attempts to prevent
writes to memory writes to memory
pq - generate the p+q (raid6 syndrome) from a series of source buffers
pq_val - validate that a p and or q buffer are in sync with a given series of
sources
datap - (raid6_datap_recov) recover a raid6 data block and the p block
from the given sources
2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
sources
3.3 Descriptor management: 3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation The return value is non-NULL and points to a 'descriptor' when the operation
...@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to ...@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
recycle (or free) the descriptor. A descriptor can be acked by one of the recycle (or free) the descriptor. A descriptor can be acked by one of the
following methods: following methods:
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent 2/ submitting an unacknowledged descriptor as a dependency to another
descriptor of a new operation. async_tx call will implicitly set the acknowledged state.
3/ calling async_tx_ack() on the descriptor. 3/ calling async_tx_ack() on the descriptor.
3.4 When does the operation execute? 3.4 When does the operation execute?
...@@ -119,12 +122,14 @@ of an operation. ...@@ -119,12 +122,14 @@ of an operation.
Perform a xor->copy->xor operation where each operation depends on the Perform a xor->copy->xor operation where each operation depends on the
result from the previous operation: result from the previous operation:
void complete_xor_copy_xor(void *param) void callback(void *param)
{ {
printk("complete\n"); struct completion *cmp = param;
complete(cmp);
} }
int run_xor_copy_xor(struct page **xor_srcs, void run_xor_copy_xor(struct page **xor_srcs,
int xor_src_cnt, int xor_src_cnt,
struct page *xor_dest, struct page *xor_dest,
size_t xor_len, size_t xor_len,
...@@ -133,16 +138,26 @@ int run_xor_copy_xor(struct page **xor_srcs, ...@@ -133,16 +138,26 @@ int run_xor_copy_xor(struct page **xor_srcs,
size_t copy_len) size_t copy_len)
{ {
struct dma_async_tx_descriptor *tx; struct dma_async_tx_descriptor *tx;
addr_conv_t addr_conv[xor_src_cnt];
struct async_submit_ctl submit;
addr_conv_t addr_conv[NDISKS];
struct completion cmp;
init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
addr_conv);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, submit->depend_tx = tx;
ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
ASYNC_TX_DEP_ACK, tx, NULL, NULL); init_completion(&cmp);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, callback, &cmp, addr_conv);
tx, complete_xor_copy_xor, NULL); tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
async_tx_issue_pending_all(); async_tx_issue_pending_all();
wait_for_completion(&cmp);
} }
See include/linux/async_tx.h for more information on the flags. See the See include/linux/async_tx.h for more information on the flags. See the
......
...@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, ...@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->src[0] = val; hw_desc->src[0] = val;
} }
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) static inline enum sum_check_flags
iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{ {
struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
return desc_ctrl.zero_result_err; return desc_ctrl.zero_result_err << SUM_CHECK_P;
} }
static inline void iop_chan_append(struct iop_adma_chan *chan) static inline void iop_chan_append(struct iop_adma_chan *chan)
......
...@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, ...@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
hw_desc->block_fill_data = val; hw_desc->block_fill_data = val;
} }
static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) static inline enum sum_check_flags
iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
{ {
struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
enum sum_check_flags flags;
BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
if (desc_ctrl.pq_xfer_en) flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
return byte_count.zero_result_err_q; flags |= byte_count.zero_result_err << SUM_CHECK_P;
else
return byte_count.zero_result_err; return flags;
} }
static inline void iop_chan_append(struct iop_adma_chan *chan) static inline void iop_chan_append(struct iop_adma_chan *chan)
......
...@@ -478,7 +478,7 @@ void __init iop13xx_platform_init(void) ...@@ -478,7 +478,7 @@ void __init iop13xx_platform_init(void)
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
...@@ -490,7 +490,7 @@ void __init iop13xx_platform_init(void) ...@@ -490,7 +490,7 @@ void __init iop13xx_platform_init(void)
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
...@@ -502,13 +502,13 @@ void __init iop13xx_platform_init(void) ...@@ -502,13 +502,13 @@ void __init iop13xx_platform_init(void)
dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
dma_cap_set(DMA_XOR, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask);
dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); dma_cap_set(DMA_PQ, plat_data->cap_mask);
dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
break; break;
} }
} }
......
...@@ -198,7 +198,7 @@ static int __init iop3xx_adma_cap_init(void) ...@@ -198,7 +198,7 @@ static int __init iop3xx_adma_cap_init(void)
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#else #else
dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
#endif #endif
......
...@@ -14,3 +14,12 @@ config ASYNC_MEMSET ...@@ -14,3 +14,12 @@ config ASYNC_MEMSET
tristate tristate
select ASYNC_CORE select ASYNC_CORE
config ASYNC_PQ
tristate
select ASYNC_CORE
config ASYNC_RAID6_RECOV
tristate
select ASYNC_CORE
select ASYNC_PQ
...@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o ...@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o
obj-$(CONFIG_ASYNC_PQ) += async_pq.o
obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
...@@ -33,28 +33,28 @@ ...@@ -33,28 +33,28 @@
* async_memcpy - attempt to copy memory with a dma engine. * async_memcpy - attempt to copy memory with a dma engine.
* @dest: destination page * @dest: destination page
* @src: src page * @src: src page
* @offset: offset in pages to start transaction * @dest_offset: offset into 'dest' to start transaction
* @src_offset: offset into 'src' to start transaction
* @len: length in bytes * @len: length in bytes
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, * @submit: submission / completion modifiers
* @depend_tx: memcpy depends on the result of this transaction *
* @cb_fn: function to call when the memcpy completes * honored flags: ASYNC_TX_ACK
* @cb_param: parameter to pass to the callback routine
*/ */
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags, unsigned int src_offset, size_t len,
struct dma_async_tx_descriptor *depend_tx, struct async_submit_ctl *submit)
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
&dest, 1, &src, 1, len); &dest, 1, &src, 1, len);
struct dma_device *device = chan ? chan->device : NULL; struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
if (device) { if (device) {
dma_addr_t dma_dest, dma_src; dma_addr_t dma_dest, dma_src;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; unsigned long dma_prep_flags;
dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len, dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
...@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ...@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
if (tx) { if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len); pr_debug("%s: (async) len: %zu\n", __func__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); async_tx_submit(chan, tx, submit);
} else { } else {
void *dest_buf, *src_buf; void *dest_buf, *src_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len); pr_debug("%s: (sync) len: %zu\n", __func__, len);
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
src_buf = kmap_atomic(src, KM_USER1) + src_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset;
...@@ -83,26 +83,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ...@@ -83,26 +83,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(dest_buf, KM_USER0);
kunmap_atomic(src_buf, KM_USER1); kunmap_atomic(src_buf, KM_USER1);
async_tx_sync_epilog(cb_fn, cb_param); async_tx_sync_epilog(submit);
} }
return tx; return tx;
} }
EXPORT_SYMBOL_GPL(async_memcpy); EXPORT_SYMBOL_GPL(async_memcpy);
static int __init async_memcpy_init(void)
{
return 0;
}
static void __exit async_memcpy_exit(void)
{
do { } while (0);
}
module_init(async_memcpy_init);
module_exit(async_memcpy_exit);
MODULE_AUTHOR("Intel Corporation"); MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memcpy api"); MODULE_DESCRIPTION("asynchronous memcpy api");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -35,26 +35,23 @@ ...@@ -35,26 +35,23 @@
* @val: fill value * @val: fill value
* @offset: offset in pages to start transaction * @offset: offset in pages to start transaction
* @len: length in bytes * @len: length in bytes
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK *
* @depend_tx: memset depends on the result of this transaction * honored flags: ASYNC_TX_ACK
* @cb_fn: function to call when the memcpy completes
* @cb_param: parameter to pass to the callback routine
*/ */
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset, async_memset(struct page *dest, int val, unsigned int offset, size_t len,
size_t len, enum async_tx_flags flags, struct async_submit_ctl *submit)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
&dest, 1, NULL, 0, len); &dest, 1, NULL, 0, len);
struct dma_device *device = chan ? chan->device : NULL; struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
if (device) { if (device) {
dma_addr_t dma_dest; dma_addr_t dma_dest;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; unsigned long dma_prep_flags;
dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
dma_dest = dma_map_page(device->dev, dest, offset, len, dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
...@@ -64,38 +61,25 @@ async_memset(struct page *dest, int val, unsigned int offset, ...@@ -64,38 +61,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
if (tx) { if (tx) {
pr_debug("%s: (async) len: %zu\n", __func__, len); pr_debug("%s: (async) len: %zu\n", __func__, len);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); async_tx_submit(chan, tx, submit);
} else { /* run the memset synchronously */ } else { /* run the memset synchronously */
void *dest_buf; void *dest_buf;
pr_debug("%s: (sync) len: %zu\n", __func__, len); pr_debug("%s: (sync) len: %zu\n", __func__, len);
dest_buf = (void *) (((char *) page_address(dest)) + offset); dest_buf = page_address(dest) + offset;
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
memset(dest_buf, val, len); memset(dest_buf, val, len);
async_tx_sync_epilog(cb_fn, cb_param); async_tx_sync_epilog(submit);
} }
return tx; return tx;
} }
EXPORT_SYMBOL_GPL(async_memset); EXPORT_SYMBOL_GPL(async_memset);
static int __init async_memset_init(void)
{
return 0;
}
static void __exit async_memset_exit(void)
{
do { } while (0);
}
module_init(async_memset_init);
module_exit(async_memset_exit);
MODULE_AUTHOR("Intel Corporation"); MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous memset api"); MODULE_DESCRIPTION("asynchronous memset api");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
/*
* Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
* Copyright(c) 2009 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
/**
* scribble - space to hold throwaway P buffer for synchronous gen_syndrome
*/
static struct page *scribble;
static bool is_raid6_zero_block(struct page *p)
{
return p == (void *) raid6_empty_zero_page;
}
/* the struct page *blocks[] parameter passed to async_gen_syndrome()
* and async_syndrome_val() contains the 'P' destination address at
* blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
*
* note: these are macros as they are used as lvalues
*/
#define P(b, d) (b[d-2])
#define Q(b, d) (b[d-1])
/**
* do_async_gen_syndrome - asynchronously calculate P and/or Q
*/
static __async_inline struct dma_async_tx_descriptor *
do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
const unsigned char *scfs, unsigned int offset, int disks,
size_t len, dma_addr_t *dma_src,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct dma_device *dma = chan->device;
enum dma_ctrl_flags dma_flags = 0;
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
dma_async_tx_callback cb_param_orig = submit->cb_param;
int src_cnt = disks - 2;
unsigned char coefs[src_cnt];
unsigned short pq_src_cnt;
dma_addr_t dma_dest[2];
int src_off = 0;
int idx;
int i;
/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
if (P(blocks, disks))
dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
len, DMA_BIDIRECTIONAL);
else
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (Q(blocks, disks))
dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
len, DMA_BIDIRECTIONAL);
else
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
/* convert source addresses being careful to collapse 'empty'
* sources and update the coefficients accordingly
*/
for (i = 0, idx = 0; i < src_cnt; i++) {
if (is_raid6_zero_block(blocks[i]))
continue;
dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
DMA_TO_DEVICE);
coefs[idx] = scfs[i];
idx++;
}
src_cnt = idx;
while (src_cnt > 0) {
submit->flags = flags_orig;
pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
/* if we are submitting additional pqs, leave the chain open,
* clear the callback parameters, and leave the destination
* buffers mapped
*/
if (src_cnt > pq_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
} else {
dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward
* progress in case they can not provide a descriptor
*/
for (;;) {
tx = dma->device_prep_dma_pq(chan, dma_dest,
&dma_src[src_off],
pq_src_cnt,
&coefs[src_off], len,
dma_flags);
if (likely(tx))
break;
async_tx_quiesce(&submit->depend_tx);
dma_async_issue_pending(chan);
}
async_tx_submit(chan, tx, submit);
submit->depend_tx = tx;
/* drop completed sources */
src_cnt -= pq_src_cnt;
src_off += pq_src_cnt;
dma_flags |= DMA_PREP_CONTINUE;
}
return tx;
}
/**
* do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
*/
static void
do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
size_t len, struct async_submit_ctl *submit)
{
void **srcs;
int i;
if (submit->scribble)
srcs = submit->scribble;
else
srcs = (void **) blocks;
for (i = 0; i < disks; i++) {
if (is_raid6_zero_block(blocks[i])) {
BUG_ON(i > disks - 3); /* P or Q can't be zero */
srcs[i] = blocks[i];
} else
srcs[i] = page_address(blocks[i]) + offset;
}
raid6_call.gen_syndrome(disks, len, srcs);
async_tx_sync_epilog(submit);
}
/**
* async_gen_syndrome - asynchronously calculate a raid6 syndrome
* @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
* @offset: common offset into each block (src and dest) to start transaction
* @disks: number of blocks (including missing P or Q, see below)
* @len: length of operation in bytes
* @submit: submission/completion modifiers
*
* General note: This routine assumes a field of GF(2^8) with a
* primitive polynomial of 0x11d and a generator of {02}.
*
* 'disks' note: callers can optionally omit either P or Q (but not
* both) from the calculation by setting blocks[disks-2] or
* blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
* PAGE_SIZE as a temporary buffer of this size is used in the
* synchronous path. 'disks' always accounts for both destination
* buffers.
*
* 'blocks' note: if submit->scribble is NULL then the contents of
* 'blocks' may be overridden
*/
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
size_t len, struct async_submit_ctl *submit)
{
int src_cnt = disks - 2;
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&P(blocks, disks), 2,
blocks, src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
dma_addr_t *dma_src = NULL;
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) blocks;
if (dma_src && device &&
(src_cnt <= dma_maxpq(device, 0) ||
dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
/* run the p+q asynchronously */
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
disks, len, dma_src, submit);
}
/* run the pq synchronously */
pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
/* wait for any prerequisite operations */
async_tx_quiesce(&submit->depend_tx);
if (!P(blocks, disks)) {
P(blocks, disks) = scribble;
BUG_ON(len + offset > PAGE_SIZE);
}
if (!Q(blocks, disks)) {
Q(blocks, disks) = scribble;
BUG_ON(len + offset > PAGE_SIZE);
}
do_sync_gen_syndrome(blocks, offset, disks, len, submit);
return NULL;
}
EXPORT_SYMBOL_GPL(async_gen_syndrome);
/**
* async_syndrome_val - asynchronously validate a raid6 syndrome
* @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
* @offset: common offset into each block (src and dest) to start transaction
* @disks: number of blocks (including missing P or Q, see below)
* @len: length of operation in bytes
* @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
* @spare: temporary result buffer for the synchronous case
* @submit: submission / completion modifiers
*
* The same notes from async_gen_syndrome apply to the 'blocks',
* and 'disks' parameters of this routine. The synchronous path
* requires a temporary result buffer and submit->scribble to be
* specified.
*/
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
NULL, 0, blocks, disks,
len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
dma_addr_t *dma_src = NULL;
BUG_ON(disks < 4);
if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) blocks;
if (dma_src && device && disks <= dma_maxpq(device, 0)) {
struct device *dev = device->dev;
dma_addr_t *pq = &dma_src[disks-2];
int i;
pr_debug("%s: (async) disks: %d len: %zu\n",
__func__, disks, len);
if (!P(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (!Q(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
for (i = 0; i < disks; i++)
if (likely(blocks[i])) {
BUG_ON(is_raid6_zero_block(blocks[i]));
dma_src[i] = dma_map_page(dev, blocks[i],
offset, len,
DMA_TO_DEVICE);
}
for (;;) {
tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
disks - 2,
raid6_gfexp,
len, pqres,
dma_flags);
if (likely(tx))
break;
async_tx_quiesce(&submit->depend_tx);
dma_async_issue_pending(chan);
}
async_tx_submit(chan, tx, submit);
return tx;
} else {
struct page *p_src = P(blocks, disks);
struct page *q_src = Q(blocks, disks);
enum async_tx_flags flags_orig = submit->flags;
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *scribble = submit->scribble;
void *cb_param_orig = submit->cb_param;
void *p, *q, *s;
pr_debug("%s: (sync) disks: %d len: %zu\n",
__func__, disks, len);
/* caller must provide a temporary result buffer and
* allow the input parameters to be preserved
*/
BUG_ON(!spare || !scribble);
/* wait for any prerequisite operations */
async_tx_quiesce(&submit->depend_tx);
/* recompute p and/or q into the temporary buffer and then
* check to see the result matches the current value
*/
tx = NULL;
*pqres = 0;
if (p_src) {
init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, scribble);
tx = async_xor(spare, blocks, offset, disks-2, len, submit);
async_tx_quiesce(&tx);
p = page_address(p_src) + offset;
s = page_address(spare) + offset;
*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
}
if (q_src) {
P(blocks, disks) = NULL;
Q(blocks, disks) = spare;
init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, offset, disks, len, submit);
async_tx_quiesce(&tx);
q = page_address(q_src) + offset;
s = page_address(spare) + offset;
*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
}
/* restore P, Q and submit */
P(blocks, disks) = p_src;
Q(blocks, disks) = q_src;
submit->cb_fn = cb_fn_orig;
submit->cb_param = cb_param_orig;
submit->flags = flags_orig;
async_tx_sync_epilog(submit);
return NULL;
}
}
EXPORT_SYMBOL_GPL(async_syndrome_val);
static int __init async_pq_init(void)
{
scribble = alloc_page(GFP_KERNEL);
if (scribble)
return 0;
pr_err("%s: failed to allocate required spare page\n", __func__);
return -ENOMEM;
}
static void __exit async_pq_exit(void)
{
put_page(scribble);
}
module_init(async_pq_init);
module_exit(async_pq_exit);
MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
MODULE_LICENSE("GPL");
/*
* Asynchronous RAID-6 recovery calculations ASYNC_TX API.
* Copyright(c) 2009 Intel Corporation
*
* based on raid6recov.c:
* Copyright 2002 H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
static struct dma_async_tx_descriptor *
async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
size_t len, struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, srcs, 2, len);
struct dma_device *dma = chan ? chan->device : NULL;
const u8 *amul, *bmul;
u8 ax, bx;
u8 *a, *b, *c;
if (dma) {
dma_addr_t dma_dest[2];
dma_addr_t dma_src[2];
struct device *dev = dma->dev;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
len, dma_flags);
if (tx) {
async_tx_submit(chan, tx, submit);
return tx;
}
}
/* run the operation synchronously */
async_tx_quiesce(&submit->depend_tx);
amul = raid6_gfmul[coef[0]];
bmul = raid6_gfmul[coef[1]];
a = page_address(srcs[0]);
b = page_address(srcs[1]);
c = page_address(dest);
while (len--) {
ax = amul[*a++];
bx = bmul[*b++];
*c++ = ax ^ bx;
}
return NULL;
}
static struct dma_async_tx_descriptor *
async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
struct async_submit_ctl *submit)
{
struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
&dest, 1, &src, 1, len);
struct dma_device *dma = chan ? chan->device : NULL;
const u8 *qmul; /* Q multiplier table */
u8 *d, *s;
if (dma) {
dma_addr_t dma_dest[2];
dma_addr_t dma_src[1];
struct device *dev = dma->dev;
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
len, dma_flags);
if (tx) {
async_tx_submit(chan, tx, submit);
return tx;
}
}
/* no channel available, or failed to allocate a descriptor, so
* perform the operation synchronously
*/
async_tx_quiesce(&submit->depend_tx);
qmul = raid6_gfmul[coef];
d = page_address(dest);
s = page_address(src);
while (len--)
*d++ = qmul[*s++];
return NULL;
}
static struct dma_async_tx_descriptor *
__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *a, *b;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
p = blocks[4-2];
q = blocks[4-1];
a = blocks[faila];
b = blocks[failb];
/* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = p;
srcs[1] = q;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_sum_product(b, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = p;
srcs[1] = b;
init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(a, srcs, 0, 2, bytes, submit);
return tx;
}
static struct dma_async_tx_descriptor *
__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *g, *dp, *dq;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
int uninitialized_var(good);
int i;
for (i = 0; i < 3; i++) {
if (i == faila || i == failb)
continue;
else {
good = i;
break;
}
}
BUG_ON(i >= 3);
p = blocks[5-2];
q = blocks[5-1];
g = blocks[good];
/* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for delta p and
* delta q
*/
dp = blocks[faila];
dq = blocks[failb];
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_memcpy(dp, g, 0, 0, bytes, submit);
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = dp;
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = dp;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
return tx;
}
static struct dma_async_tx_descriptor *
__2data_recov_n(int disks, size_t bytes, int faila, int failb,
struct page **blocks, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dp, *dq;
struct page *srcs[2];
unsigned char coef[2];
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
p = blocks[disks-2];
q = blocks[disks-1];
/* Compute syndrome with zero for the missing data pages
* Use the dead data pages as temporary storage for
* delta p and delta q
*/
dp = blocks[faila];
blocks[faila] = (void *)raid6_empty_zero_page;
blocks[disks-2] = dp;
dq = blocks[failb];
blocks[failb] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
/* Restore pointer table */
blocks[faila] = dp;
blocks[failb] = dq;
blocks[disks-2] = p;
blocks[disks-1] = q;
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
srcs[0] = dp;
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
srcs[0] = dp;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
return tx;
}
/**
* async_raid6_2data_recov - asynchronously calculate two missing data blocks
* @disks: number of disks in the RAID-6 array
* @bytes: block size
* @faila: first failed drive index
* @failb: second failed drive index
* @blocks: array of source pointers where the last two entries are p and q
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
struct page **blocks, struct async_submit_ctl *submit)
{
BUG_ON(faila == failb);
if (failb < faila)
swap(faila, failb);
pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
/* we need to preserve the contents of 'blocks' for the async
* case, so punt to synchronous if a scribble buffer is not available
*/
if (!submit->scribble) {
void **ptrs = (void **) blocks;
int i;
async_tx_quiesce(&submit->depend_tx);
for (i = 0; i < disks; i++)
ptrs[i] = page_address(blocks[i]);
raid6_2data_recov(disks, bytes, faila, failb, ptrs);
async_tx_sync_epilog(submit);
return NULL;
}
switch (disks) {
case 4:
/* dma devices do not uniformly understand a zero source pq
* operation (in contrast to the synchronous case), so
* explicitly handle the 4 disk special case
*/
return __2data_recov_4(bytes, faila, failb, blocks, submit);
case 5:
/* dma devices do not uniformly understand a single
* source pq operation (in contrast to the synchronous
* case), so explicitly handle the 5 disk special case
*/
return __2data_recov_5(bytes, faila, failb, blocks, submit);
default:
return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
}
}
EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
/**
* async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
* @disks: number of disks in the RAID-6 array
* @bytes: block size
* @faila: failed drive index
* @blocks: array of source pointers where the last two entries are p and q
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int disks, size_t bytes, int faila,
struct page **blocks, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dq;
u8 coef;
enum async_tx_flags flags = submit->flags;
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
struct page *srcs[2];
pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
/* we need to preserve the contents of 'blocks' for the async
* case, so punt to synchronous if a scribble buffer is not available
*/
if (!scribble) {
void **ptrs = (void **) blocks;
int i;
async_tx_quiesce(&submit->depend_tx);
for (i = 0; i < disks; i++)
ptrs[i] = page_address(blocks[i]);
raid6_datap_recov(disks, bytes, faila, ptrs);
async_tx_sync_epilog(submit);
return NULL;
}
p = blocks[disks-2];
q = blocks[disks-1];
/* Compute syndrome with zero for the missing data page
* Use the dead data page as temporary storage for delta q
*/
dq = blocks[faila];
blocks[faila] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
/* in the 4 disk case we only need to perform a single source
* multiplication
*/
if (disks == 4) {
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_memcpy(p, g, 0, 0, bytes, submit);
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
}
/* Restore pointer table */
blocks[faila] = dq;
blocks[disks-1] = q;
/* calculate g^{-faila} */
coef = raid6_gfinv[raid6_gfexp[faila]];
srcs[0] = dq;
srcs[1] = q;
init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
init_async_submit(submit, 0, tx, NULL, NULL, scribble);
tx = async_mult(dq, dq, coef, bytes, submit);
srcs[0] = p;
srcs[1] = dq;
init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
cb_param, scribble);
tx = async_xor(p, srcs, 0, 2, bytes, submit);
return tx;
}
EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
MODULE_LICENSE("GPL");
...@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void) ...@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
async_dmaengine_put(); async_dmaengine_put();
} }
module_init(async_tx_init);
module_exit(async_tx_exit);
/** /**
* __async_tx_find_channel - find a channel to carry out the operation or let * __async_tx_find_channel - find a channel to carry out the operation or let
* the transaction execute synchronously * the transaction execute synchronously
* @depend_tx: transaction dependency * @submit: transaction dependency and submission modifiers
* @tx_type: transaction type * @tx_type: transaction type
*/ */
struct dma_chan * struct dma_chan *
__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, __async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type) enum dma_transaction_type tx_type)
{ {
struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
/* see if we can keep the chain on one channel */ /* see if we can keep the chain on one channel */
if (depend_tx && if (depend_tx &&
dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
...@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ...@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
return async_dma_find_channel(tx_type); return async_dma_find_channel(tx_type);
} }
EXPORT_SYMBOL_GPL(__async_tx_find_channel); EXPORT_SYMBOL_GPL(__async_tx_find_channel);
#else
static int __init async_tx_init(void)
{
printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
return 0;
}
static void __exit async_tx_exit(void)
{
do { } while (0);
}
#endif #endif
...@@ -83,8 +77,8 @@ static void ...@@ -83,8 +77,8 @@ static void
async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
struct dma_async_tx_descriptor *tx) struct dma_async_tx_descriptor *tx)
{ {
struct dma_chan *chan; struct dma_chan *chan = depend_tx->chan;
struct dma_device *device; struct dma_device *device = chan->device;
struct dma_async_tx_descriptor *intr_tx = (void *) ~0; struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
/* first check to see if we can still append to depend_tx */ /* first check to see if we can still append to depend_tx */
...@@ -96,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, ...@@ -96,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
} }
spin_unlock_bh(&depend_tx->lock); spin_unlock_bh(&depend_tx->lock);
if (!intr_tx) /* attached dependency, flush the parent channel */
if (!intr_tx) {
device->device_issue_pending(chan);
return; return;
}
chan = depend_tx->chan;
device = chan->device;
/* see if we can schedule an interrupt /* see if we can schedule an interrupt
* otherwise poll for completion * otherwise poll for completion
...@@ -134,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, ...@@ -134,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
intr_tx->tx_submit(intr_tx); intr_tx->tx_submit(intr_tx);
async_tx_ack(intr_tx); async_tx_ack(intr_tx);
} }
device->device_issue_pending(chan);
} else { } else {
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n", panic("%s: DMA_ERROR waiting for depend_tx\n",
...@@ -144,13 +139,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, ...@@ -144,13 +139,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
/** /**
* submit_disposition - while holding depend_tx->lock we must avoid submitting * submit_disposition - flags for routing an incoming operation
* new operations to prevent a circular locking dependency with
* drivers that already hold a channel lock when calling
* async_tx_run_dependencies.
* @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
* @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
* @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
*
* while holding depend_tx->lock we must avoid submitting new operations
* to prevent a circular locking dependency with drivers that already
* hold a channel lock when calling async_tx_run_dependencies.
*/ */
enum submit_disposition { enum submit_disposition {
ASYNC_TX_SUBMITTED, ASYNC_TX_SUBMITTED,
...@@ -160,11 +156,12 @@ enum submit_disposition { ...@@ -160,11 +156,12 @@ enum submit_disposition {
void void
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, struct async_submit_ctl *submit)
dma_async_tx_callback cb_fn, void *cb_param)
{ {
tx->callback = cb_fn; struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
tx->callback_param = cb_param;
tx->callback = submit->cb_fn;
tx->callback_param = submit->cb_param;
if (depend_tx) { if (depend_tx) {
enum submit_disposition s; enum submit_disposition s;
...@@ -220,30 +217,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, ...@@ -220,30 +217,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
tx->tx_submit(tx); tx->tx_submit(tx);
} }
if (flags & ASYNC_TX_ACK) if (submit->flags & ASYNC_TX_ACK)
async_tx_ack(tx); async_tx_ack(tx);
if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) if (depend_tx)
async_tx_ack(depend_tx); async_tx_ack(depend_tx);
} }
EXPORT_SYMBOL_GPL(async_tx_submit); EXPORT_SYMBOL_GPL(async_tx_submit);
/** /**
* async_trigger_callback - schedules the callback function to be run after * async_trigger_callback - schedules the callback function to be run
* any dependent operations have been completed. * @submit: submission and completion parameters
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK *
* @depend_tx: 'callback' requires the completion of this transaction * honored flags: ASYNC_TX_ACK
* @cb_fn: function to call after depend_tx completes *
* @cb_param: parameter to pass to the callback routine * The callback is run after any dependent operations have completed.
*/ */
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags, async_trigger_callback(struct async_submit_ctl *submit)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_chan *chan; struct dma_chan *chan;
struct dma_device *device; struct dma_device *device;
struct dma_async_tx_descriptor *tx; struct dma_async_tx_descriptor *tx;
struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
if (depend_tx) { if (depend_tx) {
chan = depend_tx->chan; chan = depend_tx->chan;
...@@ -262,14 +258,14 @@ async_trigger_callback(enum async_tx_flags flags, ...@@ -262,14 +258,14 @@ async_trigger_callback(enum async_tx_flags flags,
if (tx) { if (tx) {
pr_debug("%s: (async)\n", __func__); pr_debug("%s: (async)\n", __func__);
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); async_tx_submit(chan, tx, submit);
} else { } else {
pr_debug("%s: (sync)\n", __func__); pr_debug("%s: (sync)\n", __func__);
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
async_tx_sync_epilog(cb_fn, cb_param); async_tx_sync_epilog(submit);
} }
return tx; return tx;
...@@ -295,9 +291,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) ...@@ -295,9 +291,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
} }
EXPORT_SYMBOL_GPL(async_tx_quiesce); EXPORT_SYMBOL_GPL(async_tx_quiesce);
module_init(async_tx_init);
module_exit(async_tx_exit);
MODULE_AUTHOR("Intel Corporation"); MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -33,19 +33,16 @@ ...@@ -33,19 +33,16 @@
/* do_async_xor - dma map the pages and perform the xor with an engine */ /* do_async_xor - dma map the pages and perform the xor with an engine */
static __async_inline struct dma_async_tx_descriptor * static __async_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
unsigned int offset, int src_cnt, size_t len, unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
enum async_tx_flags flags, struct async_submit_ctl *submit)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_device *dma = chan->device; struct dma_device *dma = chan->device;
dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
int src_off = 0; int src_off = 0;
int i; int i;
dma_async_tx_callback _cb_fn; dma_async_tx_callback cb_fn_orig = submit->cb_fn;
void *_cb_param; void *cb_param_orig = submit->cb_param;
enum async_tx_flags async_flags; enum async_tx_flags flags_orig = submit->flags;
enum dma_ctrl_flags dma_flags; enum dma_ctrl_flags dma_flags;
int xor_src_cnt; int xor_src_cnt;
dma_addr_t dma_dest; dma_addr_t dma_dest;
...@@ -63,23 +60,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, ...@@ -63,23 +60,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
} }
while (src_cnt) { while (src_cnt) {
async_flags = flags; submit->flags = flags_orig;
dma_flags = 0; dma_flags = 0;
xor_src_cnt = min(src_cnt, dma->max_xor); xor_src_cnt = min(src_cnt, (int)dma->max_xor);
/* if we are submitting additional xors, leave the chain open, /* if we are submitting additional xors, leave the chain open,
* clear the callback parameters, and leave the destination * clear the callback parameters, and leave the destination
* buffer mapped * buffer mapped
*/ */
if (src_cnt > xor_src_cnt) { if (src_cnt > xor_src_cnt) {
async_flags &= ~ASYNC_TX_ACK; submit->flags &= ~ASYNC_TX_ACK;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
_cb_fn = NULL; submit->cb_fn = NULL;
_cb_param = NULL; submit->cb_param = NULL;
} else { } else {
_cb_fn = cb_fn; submit->cb_fn = cb_fn_orig;
_cb_param = cb_param; submit->cb_param = cb_param_orig;
} }
if (_cb_fn) if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT; dma_flags |= DMA_PREP_INTERRUPT;
/* Since we have clobbered the src_list we are committed /* Since we have clobbered the src_list we are committed
...@@ -90,7 +87,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, ...@@ -90,7 +87,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
xor_src_cnt, len, dma_flags); xor_src_cnt, len, dma_flags);
if (unlikely(!tx)) if (unlikely(!tx))
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
/* spin wait for the preceeding transactions to complete */ /* spin wait for the preceeding transactions to complete */
while (unlikely(!tx)) { while (unlikely(!tx)) {
...@@ -101,11 +98,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, ...@@ -101,11 +98,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
dma_flags); dma_flags);
} }
async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, async_tx_submit(chan, tx, submit);
_cb_param); submit->depend_tx = tx;
depend_tx = tx;
flags |= ASYNC_TX_DEP_ACK;
if (src_cnt > xor_src_cnt) { if (src_cnt > xor_src_cnt) {
/* drop completed sources */ /* drop completed sources */
...@@ -124,23 +118,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, ...@@ -124,23 +118,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
static void static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags, int src_cnt, size_t len, struct async_submit_ctl *submit)
dma_async_tx_callback cb_fn, void *cb_param)
{ {
int i; int i;
int xor_src_cnt; int xor_src_cnt;
int src_off = 0; int src_off = 0;
void *dest_buf; void *dest_buf;
void **srcs = (void **) src_list; void **srcs;
if (submit->scribble)
srcs = submit->scribble;
else
srcs = (void **) src_list;
/* reuse the 'src_list' array to convert to buffer pointers */ /* convert to buffer pointers */
for (i = 0; i < src_cnt; i++) for (i = 0; i < src_cnt; i++)
srcs[i] = page_address(src_list[i]) + offset; srcs[i] = page_address(src_list[i]) + offset;
/* set destination address */ /* set destination address */
dest_buf = page_address(dest) + offset; dest_buf = page_address(dest) + offset;
if (flags & ASYNC_TX_XOR_ZERO_DST) if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
memset(dest_buf, 0, len); memset(dest_buf, 0, len);
while (src_cnt > 0) { while (src_cnt > 0) {
...@@ -153,61 +151,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, ...@@ -153,61 +151,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
src_off += xor_src_cnt; src_off += xor_src_cnt;
} }
async_tx_sync_epilog(cb_fn, cb_param); async_tx_sync_epilog(submit);
} }
/** /**
* async_xor - attempt to xor a set of blocks with a dma engine. * async_xor - attempt to xor a set of blocks with a dma engine.
* xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
* flag must be set to not include dest data in the calculation. The
* assumption with dma eninges is that they only use the destination
* buffer as a source when it is explicity specified in the source list.
* @dest: destination page * @dest: destination page
* @src_list: array of source pages (if the dest is also a source it must be * @src_list: array of source pages
* at index zero). The contents of this array may be overwritten. * @offset: common src/dst offset to start transaction
* @offset: offset in pages to start transaction
* @src_cnt: number of source pages * @src_cnt: number of source pages
* @len: length in bytes * @len: length in bytes
* @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, * @submit: submission / completion modifiers
* ASYNC_TX_ACK, ASYNC_TX_DEP_ACK *
* @depend_tx: xor depends on the result of this transaction. * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
* @cb_fn: function to call when the xor completes *
* @cb_param: parameter to pass to the callback routine * xor_blocks always uses the dest as a source so the
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
* the calculation. The assumption with dma eninges is that they only
* use the destination buffer as a source when it is explicity specified
* in the source list.
*
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/ */
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset, async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags, int src_cnt, size_t len, struct async_submit_ctl *submit)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
&dest, 1, src_list, &dest, 1, src_list,
src_cnt, len); src_cnt, len);
dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1); BUG_ON(src_cnt <= 1);
if (chan) { if (submit->scribble)
dma_src = submit->scribble;
else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) src_list;
if (dma_src && chan) {
/* run the xor asynchronously */ /* run the xor asynchronously */
pr_debug("%s (async): len: %zu\n", __func__, len); pr_debug("%s (async): len: %zu\n", __func__, len);
return do_async_xor(chan, dest, src_list, offset, src_cnt, len, return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
flags, depend_tx, cb_fn, cb_param); dma_src, submit);
} else { } else {
/* run the xor synchronously */ /* run the xor synchronously */
pr_debug("%s (sync): len: %zu\n", __func__, len); pr_debug("%s (sync): len: %zu\n", __func__, len);
WARN_ONCE(chan, "%s: no space for dma address conversion\n",
__func__);
/* in the sync case the dest is an implied source /* in the sync case the dest is an implied source
* (assumes the dest is the first source) * (assumes the dest is the first source)
*/ */
if (flags & ASYNC_TX_XOR_DROP_DST) { if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--; src_cnt--;
src_list++; src_list++;
} }
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
do_sync_xor(dest, src_list, offset, src_cnt, len, do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
flags, cb_fn, cb_param);
return NULL; return NULL;
} }
...@@ -222,104 +229,90 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) ...@@ -222,104 +229,90 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
} }
/** /**
* async_xor_zero_sum - attempt a xor parity check with a dma engine. * async_xor_val - attempt a xor parity check with a dma engine.
* @dest: destination page used if the xor is performed synchronously * @dest: destination page used if the xor is performed synchronously
* @src_list: array of source pages. The dest page must be listed as a source * @src_list: array of source pages
* at index zero. The contents of this array may be overwritten.
* @offset: offset in pages to start transaction * @offset: offset in pages to start transaction
* @src_cnt: number of source pages * @src_cnt: number of source pages
* @len: length in bytes * @len: length in bytes
* @result: 0 if sum == 0 else non-zero * @result: 0 if sum == 0 else non-zero
* @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK * @submit: submission / completion modifiers
* @depend_tx: xor depends on the result of this transaction. *
* @cb_fn: function to call when the xor completes * honored flags: ASYNC_TX_ACK
* @cb_param: parameter to pass to the callback routine *
* src_list note: if the dest is also a source it must be at index zero.
* The contents of this array will be overwritten if a scribble region
* is not specified.
*/ */
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list, async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
unsigned int offset, int src_cnt, size_t len, int src_cnt, size_t len, enum sum_check_flags *result,
u32 *result, enum async_tx_flags flags, struct async_submit_ctl *submit)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param)
{ {
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
&dest, 1, src_list, &dest, 1, src_list,
src_cnt, len); src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL; struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
dma_addr_t *dma_src = NULL;
BUG_ON(src_cnt <= 1); BUG_ON(src_cnt <= 1);
if (device && src_cnt <= device->max_xor) { if (submit->scribble)
dma_addr_t *dma_src = (dma_addr_t *) src_list; dma_src = submit->scribble;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; else if (sizeof(dma_addr_t) <= sizeof(struct page *))
dma_src = (dma_addr_t *) src_list;
if (dma_src && device && src_cnt <= device->max_xor) {
unsigned long dma_prep_flags;
int i; int i;
pr_debug("%s: (async) len: %zu\n", __func__, len); pr_debug("%s: (async) len: %zu\n", __func__, len);
dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
for (i = 0; i < src_cnt; i++) for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i], dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE); offset, len, DMA_TO_DEVICE);
tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
len, result, len, result,
dma_prep_flags); dma_prep_flags);
if (unlikely(!tx)) { if (unlikely(!tx)) {
async_tx_quiesce(&depend_tx); async_tx_quiesce(&submit->depend_tx);
while (!tx) { while (!tx) {
dma_async_issue_pending(chan); dma_async_issue_pending(chan);
tx = device->device_prep_dma_zero_sum(chan, tx = device->device_prep_dma_xor_val(chan,
dma_src, src_cnt, len, result, dma_src, src_cnt, len, result,
dma_prep_flags); dma_prep_flags);
} }
} }
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); async_tx_submit(chan, tx, submit);
} else { } else {
unsigned long xor_flags = flags; enum async_tx_flags flags_orig = submit->flags;
pr_debug("%s: (sync) len: %zu\n", __func__, len); pr_debug("%s: (sync) len: %zu\n", __func__, len);
WARN_ONCE(device && src_cnt <= device->max_xor,
"%s: no space for dma address conversion\n",
__func__);
xor_flags |= ASYNC_TX_XOR_DROP_DST; submit->flags |= ASYNC_TX_XOR_DROP_DST;
xor_flags &= ~ASYNC_TX_ACK; submit->flags &= ~ASYNC_TX_ACK;
tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
depend_tx, NULL, NULL);
async_tx_quiesce(&tx); async_tx_quiesce(&tx);
*result = page_is_zero(dest, offset, len) ? 0 : 1; *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
async_tx_sync_epilog(cb_fn, cb_param); async_tx_sync_epilog(submit);
submit->flags = flags_orig;
} }
return tx; return tx;
} }
EXPORT_SYMBOL_GPL(async_xor_zero_sum); EXPORT_SYMBOL_GPL(async_xor_val);
static int __init async_xor_init(void)
{
#ifdef CONFIG_DMA_ENGINE
/* To conserve stack space the input src_list (array of page pointers)
* is reused to hold the array of dma addresses passed to the driver.
* This conversion is only possible when dma_addr_t is less than the
* the size of a pointer. HIGHMEM64G is known to violate this
* assumption.
*/
BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
#endif
return 0;
}
static void __exit async_xor_exit(void)
{
do { } while (0);
}
module_init(async_xor_init);
module_exit(async_xor_exit);
MODULE_AUTHOR("Intel Corporation"); MODULE_AUTHOR("Intel Corporation");
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
......
/*
* asynchronous raid6 recovery self test
* Copyright (c) 2009, Intel Corporation.
*
* based on drivers/md/raid6test/test.c:
* Copyright 2002-2007 H. Peter Anvin
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/async_tx.h>
#include <linux/random.h>
#undef pr
#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
#define NDISKS 16 /* Including P and Q */
static struct page *dataptrs[NDISKS];
static struct page *data[NDISKS+3];
static struct page *spare;
static struct page *recovi;
static struct page *recovj;
static void callback(void *param)
{
struct completion *cmp = param;
complete(cmp);
}
static void makedata(int disks)
{
int i, j;
for (i = 0; i < disks; i++) {
for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
u32 *p = page_address(data[i]) + j;
*p = random32();
}
dataptrs[i] = data[i];
}
}
static char disk_type(int d, int disks)
{
if (d == disks - 2)
return 'P';
else if (d == disks - 1)
return 'Q';
else
return 'D';
}
/* Recover two failed blocks. */
static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
{
struct async_submit_ctl submit;
addr_conv_t addr_conv[disks];
struct completion cmp;
struct dma_async_tx_descriptor *tx = NULL;
enum sum_check_flags result = ~0;
if (faila > failb)
swap(faila, failb);
if (failb == disks-1) {
if (faila == disks-2) {
/* P+Q failure. Just rebuild the syndrome. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
} else {
struct page *blocks[disks];
struct page *dest;
int count = 0;
int i;
/* data+Q failure. Reconstruct data from P,
* then rebuild syndrome
*/
for (i = disks; i-- ; ) {
if (i == faila || i == failb)
continue;
blocks[count++] = ptrs[i];
}
dest = ptrs[faila];
init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, addr_conv);
tx = async_xor(dest, blocks, 0, count, bytes, &submit);
init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
}
} else {
if (failb == disks-2) {
/* data+P failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
} else {
/* data+data failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
}
}
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
__func__, faila, failb, disks);
if (result != 0)
pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
__func__, faila, failb, result);
}
static int test_disks(int i, int j, int disks)
{
int erra, errb;
memset(page_address(recovi), 0xf0, PAGE_SIZE);
memset(page_address(recovj), 0xba, PAGE_SIZE);
dataptrs[i] = recovi;
dataptrs[j] = recovj;
raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
__func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
(!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
dataptrs[i] = data[i];
dataptrs[j] = data[j];
return erra || errb;
}
static int test(int disks, int *tests)
{
addr_conv_t addr_conv[disks];
struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
struct completion cmp;
int err = 0;
int i, j;
recovi = data[disks];
recovj = data[disks+1];
spare = data[disks+2];
makedata(disks);
/* Nuke syndromes */
memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
/* Generate assumed good syndrome */
init_completion(&cmp);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
async_tx_issue_pending(tx);
if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
pr("error: initial gen_syndrome(%d) timed out\n", disks);
return 1;
}
pr("testing the %d-disk case...\n", disks);
for (i = 0; i < disks-1; i++)
for (j = i+1; j < disks; j++) {
(*tests)++;
err += test_disks(i, j, disks);
}
return err;
}
static int raid6_test(void)
{
int err = 0;
int tests = 0;
int i;
for (i = 0; i < NDISKS+3; i++) {
data[i] = alloc_page(GFP_KERNEL);
if (!data[i]) {
while (i--)
put_page(data[i]);
return -ENOMEM;
}
}
/* the 4-disk and 5-disk cases are special for the recovery code */
if (NDISKS > 4)
err += test(4, &tests);
if (NDISKS > 5)
err += test(5, &tests);
err += test(NDISKS, &tests);
pr("\n");
pr("complete (%d tests, %d failure%s)\n",
tests, err, err == 1 ? "" : "s");
for (i = 0; i < NDISKS+3; i++)
put_page(data[i]);
return 0;
}
static void raid6_test_exit(void)
{
}
/* when compiled-in wait for drivers to load first (assumes dma drivers
* are also compliled-in)
*/
late_initcall(raid6_test);
module_exit(raid6_test_exit);
MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
MODULE_LICENSE("GPL");
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
menuconfig DMADEVICES menuconfig DMADEVICES
bool "DMA Engine support" bool "DMA Engine support"
depends on !HIGHMEM64G && HAS_DMA depends on HAS_DMA
help help
DMA engines can do asynchronous data transfers without DMA engines can do asynchronous data transfers without
involving the host CPU. Currently, this framework can be involving the host CPU. Currently, this framework can be
......
...@@ -644,8 +644,12 @@ int dma_async_device_register(struct dma_device *device) ...@@ -644,8 +644,12 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_memcpy); !device->device_prep_dma_memcpy);
BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
!device->device_prep_dma_xor); !device->device_prep_dma_xor);
BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
!device->device_prep_dma_zero_sum); !device->device_prep_dma_xor_val);
BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
!device->device_prep_dma_pq);
BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
!device->device_prep_dma_pq_val);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset); !device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
...@@ -939,49 +943,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); ...@@ -939,49 +943,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
/* dma_wait_for_async_tx - spin wait for a transaction to complete /* dma_wait_for_async_tx - spin wait for a transaction to complete
* @tx: in-flight transaction to wait on * @tx: in-flight transaction to wait on
*
* This routine assumes that tx was obtained from a call to async_memcpy,
* async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
* and submitted). Walking the parent chain is only meant to cover for DMA
* drivers that do not implement the DMA_INTERRUPT capability and may race with
* the driver's descriptor cleanup routine.
*/ */
enum dma_status enum dma_status
dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
{ {
enum dma_status status; unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
struct dma_async_tx_descriptor *iter;
struct dma_async_tx_descriptor *parent;
if (!tx) if (!tx)
return DMA_SUCCESS; return DMA_SUCCESS;
WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" while (tx->cookie == -EBUSY) {
" %s\n", __func__, dma_chan_name(tx->chan)); if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
pr_err("%s timeout waiting for descriptor submission\n",
/* poll through the dependency chain, return when tx is complete */ __func__);
do { return DMA_ERROR;
iter = tx; }
/* find the root of the unsubmitted dependency chain */
do {
parent = iter->parent;
if (!parent)
break;
else
iter = parent;
} while (parent);
/* there is a small window for ->parent == NULL and
* ->cookie == -EBUSY
*/
while (iter->cookie == -EBUSY)
cpu_relax(); cpu_relax();
}
status = dma_sync_wait(iter->chan, iter->cookie); return dma_sync_wait(tx->chan, tx->cookie);
} while (status == DMA_IN_PROGRESS || (iter != tx));
return status;
} }
EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
......
...@@ -43,6 +43,11 @@ module_param(xor_sources, uint, S_IRUGO); ...@@ -43,6 +43,11 @@ module_param(xor_sources, uint, S_IRUGO);
MODULE_PARM_DESC(xor_sources, MODULE_PARM_DESC(xor_sources,
"Number of xor source buffers (default: 3)"); "Number of xor source buffers (default: 3)");
static unsigned int pq_sources = 3;
module_param(pq_sources, uint, S_IRUGO);
MODULE_PARM_DESC(pq_sources,
"Number of p+q source buffers (default: 3)");
/* /*
* Initialization patterns. All bytes in the source buffer has bit 7 * Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared. * set, all bytes in the destination buffer has bit 7 cleared.
...@@ -227,6 +232,7 @@ static int dmatest_func(void *data) ...@@ -227,6 +232,7 @@ static int dmatest_func(void *data)
dma_cookie_t cookie; dma_cookie_t cookie;
enum dma_status status; enum dma_status status;
enum dma_ctrl_flags flags; enum dma_ctrl_flags flags;
u8 pq_coefs[pq_sources];
int ret; int ret;
int src_cnt; int src_cnt;
int dst_cnt; int dst_cnt;
...@@ -243,6 +249,11 @@ static int dmatest_func(void *data) ...@@ -243,6 +249,11 @@ static int dmatest_func(void *data)
else if (thread->type == DMA_XOR) { else if (thread->type == DMA_XOR) {
src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 1; dst_cnt = 1;
} else if (thread->type == DMA_PQ) {
src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
dst_cnt = 2;
for (i = 0; i < pq_sources; i++)
pq_coefs[i] = 1;
} else } else
goto err_srcs; goto err_srcs;
...@@ -310,6 +321,15 @@ static int dmatest_func(void *data) ...@@ -310,6 +321,15 @@ static int dmatest_func(void *data)
dma_dsts[0] + dst_off, dma_dsts[0] + dst_off,
dma_srcs, xor_sources, dma_srcs, xor_sources,
len, flags); len, flags);
else if (thread->type == DMA_PQ) {
dma_addr_t dma_pq[dst_cnt];
for (i = 0; i < dst_cnt; i++)
dma_pq[i] = dma_dsts[i] + dst_off;
tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
pq_sources, pq_coefs,
len, flags);
}
if (!tx) { if (!tx) {
for (i = 0; i < src_cnt; i++) for (i = 0; i < src_cnt; i++)
...@@ -446,6 +466,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty ...@@ -446,6 +466,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
op = "copy"; op = "copy";
else if (type == DMA_XOR) else if (type == DMA_XOR)
op = "xor"; op = "xor";
else if (type == DMA_PQ)
op = "pq";
else else
return -EINVAL; return -EINVAL;
...@@ -501,6 +523,10 @@ static int dmatest_add_channel(struct dma_chan *chan) ...@@ -501,6 +523,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
cnt = dmatest_add_threads(dtc, DMA_XOR); cnt = dmatest_add_threads(dtc, DMA_XOR);
thread_count += cnt > 0 ?: 0; thread_count += cnt > 0 ?: 0;
} }
if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
cnt = dmatest_add_threads(dtc, DMA_PQ);
thread_count += cnt > 0 ?: 0;
}
pr_info("dmatest: Started %u threads using %s\n", pr_info("dmatest: Started %u threads using %s\n",
thread_count, dma_chan_name(chan)); thread_count, dma_chan_name(chan));
......
...@@ -660,7 +660,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, ...@@ -660,7 +660,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
} }
static struct dma_async_tx_descriptor * static struct dma_async_tx_descriptor *
iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
unsigned int src_cnt, size_t len, u32 *result, unsigned int src_cnt, size_t len, u32 *result,
unsigned long flags) unsigned long flags)
{ {
...@@ -906,7 +906,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) ...@@ -906,7 +906,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit static int __devinit
iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) iop_adma_xor_val_self_test(struct iop_adma_device *device)
{ {
int i, src_idx; int i, src_idx;
struct page *dest; struct page *dest;
...@@ -1002,7 +1002,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) ...@@ -1002,7 +1002,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
PAGE_SIZE, DMA_TO_DEVICE); PAGE_SIZE, DMA_TO_DEVICE);
/* skip zero sum if the capability is not present */ /* skip zero sum if the capability is not present */
if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
goto free_resources; goto free_resources;
/* zero sum the sources with the destintation page */ /* zero sum the sources with the destintation page */
...@@ -1016,7 +1016,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) ...@@ -1016,7 +1016,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev, dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE, zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE); DMA_TO_DEVICE);
tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
&zero_sum_result, &zero_sum_result,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK); DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
...@@ -1072,7 +1072,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) ...@@ -1072,7 +1072,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_srcs[i] = dma_map_page(dma_chan->device->dev, dma_srcs[i] = dma_map_page(dma_chan->device->dev,
zero_sum_srcs[i], 0, PAGE_SIZE, zero_sum_srcs[i], 0, PAGE_SIZE,
DMA_TO_DEVICE); DMA_TO_DEVICE);
tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
&zero_sum_result, &zero_sum_result,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK); DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
...@@ -1192,9 +1192,9 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) ...@@ -1192,9 +1192,9 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
dma_dev->max_xor = iop_adma_get_max_xor(); dma_dev->max_xor = iop_adma_get_max_xor();
dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
} }
if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
dma_dev->device_prep_dma_zero_sum = dma_dev->device_prep_dma_xor_val =
iop_adma_prep_dma_zero_sum; iop_adma_prep_dma_xor_val;
if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
dma_dev->device_prep_dma_interrupt = dma_dev->device_prep_dma_interrupt =
iop_adma_prep_dma_interrupt; iop_adma_prep_dma_interrupt;
...@@ -1249,7 +1249,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) ...@@ -1249,7 +1249,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
ret = iop_adma_xor_zero_sum_self_test(adev); ret = iop_adma_xor_val_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret) if (ret)
goto err_free_iop_chan; goto err_free_iop_chan;
...@@ -1257,12 +1257,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) ...@@ -1257,12 +1257,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
"( %s%s%s%s%s%s%s%s%s%s)\n", "( %s%s%s%s%s%s%s%s%s%s)\n",
dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
......
...@@ -124,6 +124,8 @@ config MD_RAID456 ...@@ -124,6 +124,8 @@ config MD_RAID456
select MD_RAID6_PQ select MD_RAID6_PQ
select ASYNC_MEMCPY select ASYNC_MEMCPY
select ASYNC_XOR select ASYNC_XOR
select ASYNC_PQ
select ASYNC_RAID6_RECOV
---help--- ---help---
A RAID-5 set of N drives with a capacity of C MB per drive provides A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure the capacity of C * (N - 1) MB, and protects against a failure
...@@ -152,9 +154,33 @@ config MD_RAID456 ...@@ -152,9 +154,33 @@ config MD_RAID456
If unsure, say Y. If unsure, say Y.
config MULTICORE_RAID456
bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
depends on MD_RAID456
depends on SMP
depends on EXPERIMENTAL
---help---
Enable the raid456 module to dispatch per-stripe raid operations to a
thread pool.
If unsure, say N.
config MD_RAID6_PQ config MD_RAID6_PQ
tristate tristate
config ASYNC_RAID6_TEST
tristate "Self test for hardware accelerated raid6 recovery"
depends on MD_RAID6_PQ
select ASYNC_RAID6_RECOV
---help---
This is a one-shot self test that permutes through the
recovery of all the possible two disk failure scenarios for a
N-disk array. Recovery is performed with the asynchronous
raid6 recovery routines, and will optionally use an offload
engine if one is available.
If unsure, say N.
config MD_MULTIPATH config MD_MULTIPATH
tristate "Multipath I/O support" tristate "Multipath I/O support"
depends on BLK_DEV_MD depends on BLK_DEV_MD
......
...@@ -47,7 +47,9 @@ ...@@ -47,7 +47,9 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/raid/pq.h> #include <linux/raid/pq.h>
#include <linux/async_tx.h> #include <linux/async_tx.h>
#include <linux/async.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/cpu.h>
#include "md.h" #include "md.h"
#include "raid5.h" #include "raid5.h"
#include "bitmap.h" #include "bitmap.h"
...@@ -499,11 +501,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, ...@@ -499,11 +501,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
struct page *bio_page; struct page *bio_page;
int i; int i;
int page_offset; int page_offset;
struct async_submit_ctl submit;
if (bio->bi_sector >= sector) if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512; page_offset = (signed)(bio->bi_sector - sector) * 512;
else else
page_offset = (signed)(sector - bio->bi_sector) * -512; page_offset = (signed)(sector - bio->bi_sector) * -512;
init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
bio_for_each_segment(bvl, bio, i) { bio_for_each_segment(bvl, bio, i) {
int len = bio_iovec_idx(bio, i)->bv_len; int len = bio_iovec_idx(bio, i)->bv_len;
int clen; int clen;
...@@ -525,15 +530,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, ...@@ -525,15 +530,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
bio_page = bio_iovec_idx(bio, i)->bv_page; bio_page = bio_iovec_idx(bio, i)->bv_page;
if (frombio) if (frombio)
tx = async_memcpy(page, bio_page, page_offset, tx = async_memcpy(page, bio_page, page_offset,
b_offset, clen, b_offset, clen, &submit);
ASYNC_TX_DEP_ACK,
tx, NULL, NULL);
else else
tx = async_memcpy(bio_page, page, b_offset, tx = async_memcpy(bio_page, page, b_offset,
page_offset, clen, page_offset, clen, &submit);
ASYNC_TX_DEP_ACK,
tx, NULL, NULL);
} }
/* chain the operations */
submit.depend_tx = tx;
if (clen < len) /* hit end of page */ if (clen < len) /* hit end of page */
break; break;
page_offset += len; page_offset += len;
...@@ -592,6 +596,7 @@ static void ops_run_biofill(struct stripe_head *sh) ...@@ -592,6 +596,7 @@ static void ops_run_biofill(struct stripe_head *sh)
{ {
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
struct async_submit_ctl submit;
int i; int i;
pr_debug("%s: stripe %llu\n", __func__, pr_debug("%s: stripe %llu\n", __func__,
...@@ -615,22 +620,34 @@ static void ops_run_biofill(struct stripe_head *sh) ...@@ -615,22 +620,34 @@ static void ops_run_biofill(struct stripe_head *sh)
} }
atomic_inc(&sh->count); atomic_inc(&sh->count);
async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
ops_complete_biofill, sh); async_trigger_callback(&submit);
} }
static void ops_complete_compute5(void *stripe_head_ref) static void mark_target_uptodate(struct stripe_head *sh, int target)
{ {
struct stripe_head *sh = stripe_head_ref; struct r5dev *tgt;
int target = sh->ops.target;
struct r5dev *tgt = &sh->dev[target];
pr_debug("%s: stripe %llu\n", __func__, if (target < 0)
(unsigned long long)sh->sector); return;
tgt = &sh->dev[target];
set_bit(R5_UPTODATE, &tgt->flags); set_bit(R5_UPTODATE, &tgt->flags);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
clear_bit(R5_Wantcompute, &tgt->flags); clear_bit(R5_Wantcompute, &tgt->flags);
}
static void ops_complete_compute(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
/* mark the computed target(s) as uptodate */
mark_target_uptodate(sh, sh->ops.target);
mark_target_uptodate(sh, sh->ops.target2);
clear_bit(STRIPE_COMPUTE_RUN, &sh->state); clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
if (sh->check_state == check_state_compute_run) if (sh->check_state == check_state_compute_run)
sh->check_state = check_state_compute_result; sh->check_state = check_state_compute_result;
...@@ -638,16 +655,24 @@ static void ops_complete_compute5(void *stripe_head_ref) ...@@ -638,16 +655,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
release_stripe(sh); release_stripe(sh);
} }
static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) /* return a pointer to the address conversion region of the scribble buffer */
static addr_conv_t *to_addr_conv(struct stripe_head *sh,
struct raid5_percpu *percpu)
{
return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
}
static struct dma_async_tx_descriptor *
ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
{ {
/* kernel stack size limits the total number of disks */
int disks = sh->disks; int disks = sh->disks;
struct page *xor_srcs[disks]; struct page **xor_srcs = percpu->scribble;
int target = sh->ops.target; int target = sh->ops.target;
struct r5dev *tgt = &sh->dev[target]; struct r5dev *tgt = &sh->dev[target];
struct page *xor_dest = tgt->page; struct page *xor_dest = tgt->page;
int count = 0; int count = 0;
struct dma_async_tx_descriptor *tx; struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
int i; int i;
pr_debug("%s: stripe %llu block: %d\n", pr_debug("%s: stripe %llu block: %d\n",
...@@ -660,17 +685,207 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) ...@@ -660,17 +685,207 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
atomic_inc(&sh->count); atomic_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute, sh, to_addr_conv(sh, percpu));
if (unlikely(count == 1)) if (unlikely(count == 1))
tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
0, NULL, ops_complete_compute5, sh); else
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx;
}
/* set_syndrome_sources - populate source buffers for gen_syndrome
* @srcs - (struct page *) array of size sh->disks
* @sh - stripe_head to parse
*
* Populates srcs in proper layout order for the stripe and returns the
* 'count' of sources to be used in a call to async_gen_syndrome. The P
* destination buffer is recorded in srcs[count] and the Q destination
* is recorded in srcs[count+1]].
*/
static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
{
int disks = sh->disks;
int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
int d0_idx = raid6_d0(sh);
int count;
int i;
for (i = 0; i < disks; i++)
srcs[i] = (void *)raid6_empty_zero_page;
count = 0;
i = d0_idx;
do {
int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
srcs[slot] = sh->dev[i].page;
i = raid6_next_disk(i, disks);
} while (i != d0_idx);
BUG_ON(count != syndrome_disks);
return count;
}
static struct dma_async_tx_descriptor *
ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
{
int disks = sh->disks;
struct page **blocks = percpu->scribble;
int target;
int qd_idx = sh->qd_idx;
struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
struct r5dev *tgt;
struct page *dest;
int i;
int count;
if (sh->ops.target < 0)
target = sh->ops.target2;
else if (sh->ops.target2 < 0)
target = sh->ops.target;
else else
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, /* we should only have one valid target */
ASYNC_TX_XOR_ZERO_DST, NULL, BUG();
ops_complete_compute5, sh); BUG_ON(target < 0);
pr_debug("%s: stripe %llu block: %d\n",
__func__, (unsigned long long)sh->sector, target);
tgt = &sh->dev[target];
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
dest = tgt->page;
atomic_inc(&sh->count);
if (target == qd_idx) {
count = set_syndrome_sources(blocks, sh);
blocks[count] = NULL; /* regenerating p is not necessary */
BUG_ON(blocks[count+1] != dest); /* q should already be set */
init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
to_addr_conv(sh, percpu));
tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
} else {
/* Compute any data- or p-drive using XOR */
count = 0;
for (i = disks; i-- ; ) {
if (i == target || i == qd_idx)
continue;
blocks[count++] = sh->dev[i].page;
}
init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute, sh,
to_addr_conv(sh, percpu));
tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
}
return tx; return tx;
} }
static struct dma_async_tx_descriptor *
ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
{
int i, count, disks = sh->disks;
int syndrome_disks = sh->ddf_layout ? disks : disks-2;
int d0_idx = raid6_d0(sh);
int faila = -1, failb = -1;
int target = sh->ops.target;
int target2 = sh->ops.target2;
struct r5dev *tgt = &sh->dev[target];
struct r5dev *tgt2 = &sh->dev[target2];
struct dma_async_tx_descriptor *tx;
struct page **blocks = percpu->scribble;
struct async_submit_ctl submit;
pr_debug("%s: stripe %llu block1: %d block2: %d\n",
__func__, (unsigned long long)sh->sector, target, target2);
BUG_ON(target < 0 || target2 < 0);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
/* we need to open-code set_syndrome_sources to handle to the
* slot number conversion for 'faila' and 'failb'
*/
for (i = 0; i < disks ; i++)
blocks[i] = (void *)raid6_empty_zero_page;
count = 0;
i = d0_idx;
do {
int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
blocks[slot] = sh->dev[i].page;
if (i == target)
faila = slot;
if (i == target2)
failb = slot;
i = raid6_next_disk(i, disks);
} while (i != d0_idx);
BUG_ON(count != syndrome_disks);
BUG_ON(faila == failb);
if (failb < faila)
swap(faila, failb);
pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
__func__, (unsigned long long)sh->sector, faila, failb);
atomic_inc(&sh->count);
if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */
if (faila == syndrome_disks) {
/* Missing P+Q, just recompute */
init_async_submit(&submit, 0, NULL, ops_complete_compute,
sh, to_addr_conv(sh, percpu));
return async_gen_syndrome(blocks, 0, count+2,
STRIPE_SIZE, &submit);
} else {
struct page *dest;
int data_target;
int qd_idx = sh->qd_idx;
/* Missing D+Q: recompute D from P, then recompute Q */
if (target == qd_idx)
data_target = target2;
else
data_target = target;
count = 0;
for (i = disks; i-- ; ) {
if (i == data_target || i == qd_idx)
continue;
blocks[count++] = sh->dev[i].page;
}
dest = sh->dev[data_target].page;
init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
NULL, NULL, to_addr_conv(sh, percpu));
tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
&submit);
count = set_syndrome_sources(blocks, sh);
init_async_submit(&submit, 0, tx, ops_complete_compute,
sh, to_addr_conv(sh, percpu));
return async_gen_syndrome(blocks, 0, count+2,
STRIPE_SIZE, &submit);
}
}
init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
to_addr_conv(sh, percpu));
if (failb == syndrome_disks) {
/* We're missing D+P. */
return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
faila, blocks, &submit);
} else {
/* We're missing D+D. */
return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE,
faila, failb, blocks, &submit);
}
}
static void ops_complete_prexor(void *stripe_head_ref) static void ops_complete_prexor(void *stripe_head_ref)
{ {
struct stripe_head *sh = stripe_head_ref; struct stripe_head *sh = stripe_head_ref;
...@@ -680,12 +895,13 @@ static void ops_complete_prexor(void *stripe_head_ref) ...@@ -680,12 +895,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
} }
static struct dma_async_tx_descriptor * static struct dma_async_tx_descriptor *
ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
struct dma_async_tx_descriptor *tx)
{ {
/* kernel stack size limits the total number of disks */
int disks = sh->disks; int disks = sh->disks;
struct page *xor_srcs[disks]; struct page **xor_srcs = percpu->scribble;
int count = 0, pd_idx = sh->pd_idx, i; int count = 0, pd_idx = sh->pd_idx, i;
struct async_submit_ctl submit;
/* existing parity data subtracted */ /* existing parity data subtracted */
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
...@@ -700,9 +916,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ...@@ -700,9 +916,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
xor_srcs[count++] = dev->page; xor_srcs[count++] = dev->page;
} }
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu));
ops_complete_prexor, sh); tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
return tx; return tx;
} }
...@@ -742,17 +958,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ...@@ -742,17 +958,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
return tx; return tx;
} }
static void ops_complete_postxor(void *stripe_head_ref) static void ops_complete_reconstruct(void *stripe_head_ref)
{ {
struct stripe_head *sh = stripe_head_ref; struct stripe_head *sh = stripe_head_ref;
int disks = sh->disks, i, pd_idx = sh->pd_idx; int disks = sh->disks;
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
int i;
pr_debug("%s: stripe %llu\n", __func__, pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector); (unsigned long long)sh->sector);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx)
if (dev->written || i == pd_idx || i == qd_idx)
set_bit(R5_UPTODATE, &dev->flags); set_bit(R5_UPTODATE, &dev->flags);
} }
...@@ -770,12 +990,12 @@ static void ops_complete_postxor(void *stripe_head_ref) ...@@ -770,12 +990,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
} }
static void static void
ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
struct dma_async_tx_descriptor *tx)
{ {
/* kernel stack size limits the total number of disks */
int disks = sh->disks; int disks = sh->disks;
struct page *xor_srcs[disks]; struct page **xor_srcs = percpu->scribble;
struct async_submit_ctl submit;
int count = 0, pd_idx = sh->pd_idx, i; int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest; struct page *xor_dest;
int prexor = 0; int prexor = 0;
...@@ -809,18 +1029,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ...@@ -809,18 +1029,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
* set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
* for the synchronous xor case * for the synchronous xor case
*/ */
flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | flags = ASYNC_TX_ACK |
(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
atomic_inc(&sh->count); atomic_inc(&sh->count);
if (unlikely(count == 1)) { init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); to_addr_conv(sh, percpu));
tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, if (unlikely(count == 1))
flags, tx, ops_complete_postxor, sh); tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
} else else
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
flags, tx, ops_complete_postxor, sh); }
static void
ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
struct dma_async_tx_descriptor *tx)
{
struct async_submit_ctl submit;
struct page **blocks = percpu->scribble;
int count;
pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
count = set_syndrome_sources(blocks, sh);
atomic_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
sh, to_addr_conv(sh, percpu));
async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
} }
static void ops_complete_check(void *stripe_head_ref) static void ops_complete_check(void *stripe_head_ref)
...@@ -835,63 +1073,115 @@ static void ops_complete_check(void *stripe_head_ref) ...@@ -835,63 +1073,115 @@ static void ops_complete_check(void *stripe_head_ref)
release_stripe(sh); release_stripe(sh);
} }
static void ops_run_check(struct stripe_head *sh) static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
{ {
/* kernel stack size limits the total number of disks */
int disks = sh->disks; int disks = sh->disks;
struct page *xor_srcs[disks]; int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
struct page *xor_dest;
struct page **xor_srcs = percpu->scribble;
struct dma_async_tx_descriptor *tx; struct dma_async_tx_descriptor *tx;
struct async_submit_ctl submit;
int count = 0, pd_idx = sh->pd_idx, i; int count;
struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; int i;
pr_debug("%s: stripe %llu\n", __func__, pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector); (unsigned long long)sh->sector);
count = 0;
xor_dest = sh->dev[pd_idx].page;
xor_srcs[count++] = xor_dest;
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; if (i == pd_idx || i == qd_idx)
if (i != pd_idx) continue;
xor_srcs[count++] = dev->page; xor_srcs[count++] = sh->dev[i].page;
} }
tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, init_async_submit(&submit, 0, NULL, NULL, NULL,
&sh->ops.zero_sum_result, 0, NULL, NULL, NULL); to_addr_conv(sh, percpu));
tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
&sh->ops.zero_sum_result, &submit);
atomic_inc(&sh->count); atomic_inc(&sh->count);
tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
ops_complete_check, sh); tx = async_trigger_callback(&submit);
} }
static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
{
struct page **srcs = percpu->scribble;
struct async_submit_ctl submit;
int count;
pr_debug("%s: stripe %llu checkp: %d\n", __func__,
(unsigned long long)sh->sector, checkp);
count = set_syndrome_sources(srcs, sh);
if (!checkp)
srcs[count] = NULL;
atomic_inc(&sh->count);
init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
sh, to_addr_conv(sh, percpu));
async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
&sh->ops.zero_sum_result, percpu->spare_page, &submit);
}
static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{ {
int overlap_clear = 0, i, disks = sh->disks; int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL; struct dma_async_tx_descriptor *tx = NULL;
raid5_conf_t *conf = sh->raid_conf;
int level = conf->level;
struct raid5_percpu *percpu;
unsigned long cpu;
cpu = get_cpu();
percpu = per_cpu_ptr(conf->percpu, cpu);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh); ops_run_biofill(sh);
overlap_clear++; overlap_clear++;
} }
if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
tx = ops_run_compute5(sh); if (level < 6)
/* terminate the chain if postxor is not set to be run */ tx = ops_run_compute5(sh, percpu);
if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) else {
if (sh->ops.target2 < 0 || sh->ops.target < 0)
tx = ops_run_compute6_1(sh, percpu);
else
tx = ops_run_compute6_2(sh, percpu);
}
/* terminate the chain if reconstruct is not set to be run */
if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
async_tx_ack(tx); async_tx_ack(tx);
} }
if (test_bit(STRIPE_OP_PREXOR, &ops_request)) if (test_bit(STRIPE_OP_PREXOR, &ops_request))
tx = ops_run_prexor(sh, tx); tx = ops_run_prexor(sh, percpu, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
tx = ops_run_biodrain(sh, tx); tx = ops_run_biodrain(sh, tx);
overlap_clear++; overlap_clear++;
} }
if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
ops_run_postxor(sh, tx); if (level < 6)
ops_run_reconstruct5(sh, percpu, tx);
else
ops_run_reconstruct6(sh, percpu, tx);
}
if (test_bit(STRIPE_OP_CHECK, &ops_request)) if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
ops_run_check(sh); if (sh->check_state == check_state_run)
ops_run_check_p(sh, percpu);
else if (sh->check_state == check_state_run_q)
ops_run_check_pq(sh, percpu, 0);
else if (sh->check_state == check_state_run_pq)
ops_run_check_pq(sh, percpu, 1);
else
BUG();
}
if (overlap_clear) if (overlap_clear)
for (i = disks; i--; ) { for (i = disks; i--; ) {
...@@ -899,6 +1189,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) ...@@ -899,6 +1189,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
if (test_and_clear_bit(R5_Overlap, &dev->flags)) if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap); wake_up(&sh->raid_conf->wait_for_overlap);
} }
put_cpu();
} }
static int grow_one_stripe(raid5_conf_t *conf) static int grow_one_stripe(raid5_conf_t *conf)
...@@ -948,6 +1239,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) ...@@ -948,6 +1239,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
return 0; return 0;
} }
/**
* scribble_len - return the required size of the scribble region
* @num - total number of disks in the array
*
* The size must be enough to contain:
* 1/ a struct page pointer for each device in the array +2
* 2/ room to convert each entry in (1) to its corresponding dma
* (dma_map_page()) or page (page_address()) address.
*
* Note: the +2 is for the destination buffers of the ddf/raid6 case where we
* calculate over all devices (not just the data blocks), using zeros in place
* of the P and Q blocks.
*/
static size_t scribble_len(int num)
{
size_t len;
len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
return len;
}
static int resize_stripes(raid5_conf_t *conf, int newsize) static int resize_stripes(raid5_conf_t *conf, int newsize)
{ {
/* Make all the stripes able to hold 'newsize' devices. /* Make all the stripes able to hold 'newsize' devices.
...@@ -976,6 +1289,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) ...@@ -976,6 +1289,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
struct stripe_head *osh, *nsh; struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes); LIST_HEAD(newstripes);
struct disk_info *ndisks; struct disk_info *ndisks;
unsigned long cpu;
int err; int err;
struct kmem_cache *sc; struct kmem_cache *sc;
int i; int i;
...@@ -1041,7 +1355,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) ...@@ -1041,7 +1355,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
/* Step 3. /* Step 3.
* At this point, we are holding all the stripes so the array * At this point, we are holding all the stripes so the array
* is completely stalled, so now is a good time to resize * is completely stalled, so now is a good time to resize
* conf->disks. * conf->disks and the scribble region
*/ */
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
if (ndisks) { if (ndisks) {
...@@ -1052,10 +1366,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) ...@@ -1052,10 +1366,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
} else } else
err = -ENOMEM; err = -ENOMEM;
get_online_cpus();
conf->scribble_len = scribble_len(newsize);
for_each_present_cpu(cpu) {
struct raid5_percpu *percpu;
void *scribble;
percpu = per_cpu_ptr(conf->percpu, cpu);
scribble = kmalloc(conf->scribble_len, GFP_NOIO);
if (scribble) {
kfree(percpu->scribble);
percpu->scribble = scribble;
} else {
err = -ENOMEM;
break;
}
}
put_online_cpus();
/* Step 4, return new stripes to service */ /* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) { while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru); nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del_init(&nsh->lru); list_del_init(&nsh->lru);
for (i=conf->raid_disks; i < newsize; i++) for (i=conf->raid_disks; i < newsize; i++)
if (nsh->dev[i].page == NULL) { if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO); struct page *p = alloc_page(GFP_NOIO);
...@@ -1594,258 +1928,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) ...@@ -1594,258 +1928,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
} }
/*
* Copy data between a page in the stripe cache, and one or more bion
* The page could align with the middle of the bio, or there could be
* several bion, each with several bio_vecs, which cover part of the page
* Multiple bion are linked together on bi_next. There may be extras
* at the end of this list. We ignore them.
*/
static void copy_data(int frombio, struct bio *bio,
struct page *page,
sector_t sector)
{
char *pa = page_address(page);
struct bio_vec *bvl;
int i;
int page_offset;
if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512;
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
bio_for_each_segment(bvl, bio, i) {
int len = bio_iovec_idx(bio,i)->bv_len;
int clen;
int b_offset = 0;
if (page_offset < 0) {
b_offset = -page_offset;
page_offset += b_offset;
len -= b_offset;
}
if (len > 0 && page_offset + len > STRIPE_SIZE)
clen = STRIPE_SIZE - page_offset;
else clen = len;
if (clen > 0) {
char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
if (frombio)
memcpy(pa+page_offset, ba+b_offset, clen);
else
memcpy(ba+b_offset, pa+page_offset, clen);
__bio_kunmap_atomic(ba, KM_USER0);
}
if (clen < len) /* hit end of page */
break;
page_offset += len;
}
}
#define check_xor() do { \
if (count == MAX_XOR_BLOCKS) { \
xor_blocks(count, STRIPE_SIZE, dest, ptr);\
count = 0; \
} \
} while(0)
static void compute_parity6(struct stripe_head *sh, int method)
{
raid5_conf_t *conf = sh->raid_conf;
int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
struct bio *chosen;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/
void *ptrs[syndrome_disks+2];
pd_idx = sh->pd_idx;
qd_idx = sh->qd_idx;
d0_idx = raid6_d0(sh);
pr_debug("compute_parity, stripe %llu, method %d\n",
(unsigned long long)sh->sector, method);
switch(method) {
case READ_MODIFY_WRITE:
BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
case RECONSTRUCT_WRITE:
for (i= disks; i-- ;)
if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
chosen = sh->dev[i].towrite;
sh->dev[i].towrite = NULL;
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
wake_up(&conf->wait_for_overlap);
BUG_ON(sh->dev[i].written);
sh->dev[i].written = chosen;
}
break;
case CHECK_PARITY:
BUG(); /* Not implemented yet */
}
for (i = disks; i--;)
if (sh->dev[i].written) {
sector_t sector = sh->dev[i].sector;
struct bio *wbi = sh->dev[i].written;
while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
copy_data(1, wbi, sh->dev[i].page, sector);
wbi = r5_next_bio(wbi, sector);
}
set_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(R5_UPTODATE, &sh->dev[i].flags);
}
/* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
for (i = 0; i < disks; i++)
ptrs[i] = (void *)raid6_empty_zero_page;
count = 0;
i = d0_idx;
do {
int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
ptrs[slot] = page_address(sh->dev[i].page);
if (slot < syndrome_disks &&
!test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
printk(KERN_ERR "block %d/%d not uptodate "
"on parity calc\n", i, count);
BUG();
}
i = raid6_next_disk(i, disks);
} while (i != d0_idx);
BUG_ON(count != syndrome_disks);
raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
switch(method) {
case RECONSTRUCT_WRITE:
set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
break;
case UPDATE_PARITY:
set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
break;
}
}
/* Compute one missing block */
static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
{
int i, count, disks = sh->disks;
void *ptr[MAX_XOR_BLOCKS], *dest, *p;
int qd_idx = sh->qd_idx;
pr_debug("compute_block_1, stripe %llu, idx %d\n",
(unsigned long long)sh->sector, dd_idx);
if ( dd_idx == qd_idx ) {
/* We're actually computing the Q drive */
compute_parity6(sh, UPDATE_PARITY);
} else {
dest = page_address(sh->dev[dd_idx].page);
if (!nozero) memset(dest, 0, STRIPE_SIZE);
count = 0;
for (i = disks ; i--; ) {
if (i == dd_idx || i == qd_idx)
continue;
p = page_address(sh->dev[i].page);
if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
ptr[count++] = p;
else
printk("compute_block() %d, stripe %llu, %d"
" not present\n", dd_idx,
(unsigned long long)sh->sector, i);
check_xor();
}
if (count)
xor_blocks(count, STRIPE_SIZE, dest, ptr);
if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
}
}
/* Compute two missing blocks */
static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
{
int i, count, disks = sh->disks;
int syndrome_disks = sh->ddf_layout ? disks : disks-2;
int d0_idx = raid6_d0(sh);
int faila = -1, failb = -1;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/
void *ptrs[syndrome_disks+2];
for (i = 0; i < disks ; i++)
ptrs[i] = (void *)raid6_empty_zero_page;
count = 0;
i = d0_idx;
do {
int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
ptrs[slot] = page_address(sh->dev[i].page);
if (i == dd_idx1)
faila = slot;
if (i == dd_idx2)
failb = slot;
i = raid6_next_disk(i, disks);
} while (i != d0_idx);
BUG_ON(count != syndrome_disks);
BUG_ON(faila == failb);
if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
(unsigned long long)sh->sector, dd_idx1, dd_idx2,
faila, failb);
if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */
if (faila == syndrome_disks) {
/* Missing P+Q, just recompute */
compute_parity6(sh, UPDATE_PARITY);
return;
} else {
/* We're missing D+Q; recompute D from P */
compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
dd_idx2 : dd_idx1),
0);
compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
return;
}
}
/* We're missing D+P or D+D; */
if (failb == syndrome_disks) {
/* We're missing D+P. */
raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
} else {
/* We're missing D+D. */
raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
ptrs);
}
/* Both the above update both missing blocks */
set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
}
static void static void
schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand) int rcw, int expand)
{ {
int i, pd_idx = sh->pd_idx, disks = sh->disks; int i, pd_idx = sh->pd_idx, disks = sh->disks;
raid5_conf_t *conf = sh->raid_conf;
int level = conf->level;
if (rcw) { if (rcw) {
/* if we are not expanding this is a proper write request, and /* if we are not expanding this is a proper write request, and
...@@ -1858,7 +1947,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -1858,7 +1947,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
} else } else
sh->reconstruct_state = reconstruct_state_run; sh->reconstruct_state = reconstruct_state_run;
set_bit(STRIPE_OP_POSTXOR, &s->ops_request); set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
...@@ -1871,17 +1960,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -1871,17 +1960,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
s->locked++; s->locked++;
} }
} }
if (s->locked + 1 == disks) if (s->locked + conf->max_degraded == disks)
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
atomic_inc(&sh->raid_conf->pending_full_writes); atomic_inc(&conf->pending_full_writes);
} else { } else {
BUG_ON(level == 6);
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
sh->reconstruct_state = reconstruct_state_prexor_drain_run; sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request); set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
set_bit(STRIPE_OP_POSTXOR, &s->ops_request); set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
...@@ -1899,13 +1989,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -1899,13 +1989,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
} }
} }
/* keep the parity disk locked while asynchronous operations /* keep the parity disk(s) locked while asynchronous operations
* are in flight * are in flight
*/ */
set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
s->locked++; s->locked++;
if (level == 6) {
int qd_idx = sh->qd_idx;
struct r5dev *dev = &sh->dev[qd_idx];
set_bit(R5_LOCKED, &dev->flags);
clear_bit(R5_UPTODATE, &dev->flags);
s->locked++;
}
pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
__func__, (unsigned long long)sh->sector, __func__, (unsigned long long)sh->sector,
s->locked, s->ops_request); s->locked, s->ops_request);
...@@ -1986,13 +2085,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in ...@@ -1986,13 +2085,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
static void end_reshape(raid5_conf_t *conf); static void end_reshape(raid5_conf_t *conf);
static int page_is_zero(struct page *p)
{
char *a = page_address(p);
return ((*(u32*)a) == 0 &&
memcmp(a, a+4, STRIPE_SIZE-4)==0);
}
static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
struct stripe_head *sh) struct stripe_head *sh)
{ {
...@@ -2133,9 +2225,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -2133,9 +2225,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute, &dev->flags); set_bit(R5_Wantcompute, &dev->flags);
sh->ops.target = disk_idx; sh->ops.target = disk_idx;
sh->ops.target2 = -1;
s->req_compute = 1; s->req_compute = 1;
/* Careful: from this point on 'uptodate' is in the eye /* Careful: from this point on 'uptodate' is in the eye
* of raid5_run_ops which services 'compute' operations * of raid_run_ops which services 'compute' operations
* before writes. R5_Wantcompute flags a block that will * before writes. R5_Wantcompute flags a block that will
* be R5_UPTODATE by the time it is needed for a * be R5_UPTODATE by the time it is needed for a
* subsequent operation. * subsequent operation.
...@@ -2174,41 +2267,56 @@ static void handle_stripe_fill5(struct stripe_head *sh, ...@@ -2174,41 +2267,56 @@ static void handle_stripe_fill5(struct stripe_head *sh,
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
} }
static void handle_stripe_fill6(struct stripe_head *sh, /* fetch_block6 - checks the given member device to see if its data needs
struct stripe_head_state *s, struct r6_state *r6s, * to be read or computed to satisfy a request.
int disks) *
* Returns 1 when no more member devices need to be checked, otherwise returns
* 0 to tell the loop in handle_stripe_fill6 to continue
*/
static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
struct r6_state *r6s, int disk_idx, int disks)
{ {
int i; struct r5dev *dev = &sh->dev[disk_idx];
for (i = disks; i--; ) { struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
struct r5dev *dev = &sh->dev[i]; &sh->dev[r6s->failed_num[1]] };
if (!test_bit(R5_LOCKED, &dev->flags) && if (!test_bit(R5_LOCKED, &dev->flags) &&
!test_bit(R5_UPTODATE, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
(dev->toread || (dev->towrite && (dev->toread ||
!test_bit(R5_OVERWRITE, &dev->flags)) || (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
s->syncing || s->expanding || s->syncing || s->expanding ||
(s->failed >= 1 && (s->failed >= 1 &&
(sh->dev[r6s->failed_num[0]].toread || (fdev[0]->toread || s->to_write)) ||
s->to_write)) ||
(s->failed >= 2 && (s->failed >= 2 &&
(sh->dev[r6s->failed_num[1]].toread || (fdev[1]->toread || s->to_write)))) {
s->to_write)))) { /* we would like to get this block, possibly by computing it,
/* we would like to get this block, possibly * otherwise read it if the backing disk is insync
* by computing it, but we might not be able to
*/ */
BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
BUG_ON(test_bit(R5_Wantread, &dev->flags));
if ((s->uptodate == disks - 1) && if ((s->uptodate == disks - 1) &&
(s->failed && (i == r6s->failed_num[0] || (s->failed && (disk_idx == r6s->failed_num[0] ||
i == r6s->failed_num[1]))) { disk_idx == r6s->failed_num[1]))) {
/* have disk failed, and we're requested to fetch it;
* do compute it
*/
pr_debug("Computing stripe %llu block %d\n", pr_debug("Computing stripe %llu block %d\n",
(unsigned long long)sh->sector, i); (unsigned long long)sh->sector, disk_idx);
compute_block_1(sh, i, 0); set_bit(STRIPE_COMPUTE_RUN, &sh->state);
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute, &dev->flags);
sh->ops.target = disk_idx;
sh->ops.target2 = -1; /* no 2nd target */
s->req_compute = 1;
s->uptodate++; s->uptodate++;
} else if ( s->uptodate == disks-2 && s->failed >= 2 ) { return 1;
} else if (s->uptodate == disks-2 && s->failed >= 2) {
/* Computing 2-failure is *very* expensive; only /* Computing 2-failure is *very* expensive; only
* do it if failed >= 2 * do it if failed >= 2
*/ */
int other; int other;
for (other = disks; other--; ) { for (other = disks; other--; ) {
if (other == i) if (other == disk_idx)
continue; continue;
if (!test_bit(R5_UPTODATE, if (!test_bit(R5_UPTODATE,
&sh->dev[other].flags)) &sh->dev[other].flags))
...@@ -2217,18 +2325,46 @@ static void handle_stripe_fill6(struct stripe_head *sh, ...@@ -2217,18 +2325,46 @@ static void handle_stripe_fill6(struct stripe_head *sh,
BUG_ON(other < 0); BUG_ON(other < 0);
pr_debug("Computing stripe %llu blocks %d,%d\n", pr_debug("Computing stripe %llu blocks %d,%d\n",
(unsigned long long)sh->sector, (unsigned long long)sh->sector,
i, other); disk_idx, other);
compute_block_2(sh, i, other); set_bit(STRIPE_COMPUTE_RUN, &sh->state);
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
set_bit(R5_Wantcompute, &sh->dev[other].flags);
sh->ops.target = disk_idx;
sh->ops.target2 = other;
s->uptodate += 2; s->uptodate += 2;
s->req_compute = 1;
return 1;
} else if (test_bit(R5_Insync, &dev->flags)) { } else if (test_bit(R5_Insync, &dev->flags)) {
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags); set_bit(R5_Wantread, &dev->flags);
s->locked++; s->locked++;
pr_debug("Reading block %d (sync=%d)\n", pr_debug("Reading block %d (sync=%d)\n",
i, s->syncing); disk_idx, s->syncing);
}
} }
} }
return 0;
}
/**
* handle_stripe_fill6 - read or compute data to satisfy pending requests.
*/
static void handle_stripe_fill6(struct stripe_head *sh,
struct stripe_head_state *s, struct r6_state *r6s,
int disks)
{
int i;
/* look for blocks to read/compute, skip this if a compute
* is already in flight, or if the stripe contents are in the
* midst of changing due to a write
*/
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
!sh->reconstruct_state)
for (i = disks; i--; )
if (fetch_block6(sh, s, r6s, i, disks))
break;
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
} }
...@@ -2362,52 +2498,37 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, ...@@ -2362,52 +2498,37 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
*/ */
/* since handle_stripe can be called at any time we need to handle the /* since handle_stripe can be called at any time we need to handle the
* case where a compute block operation has been submitted and then a * case where a compute block operation has been submitted and then a
* subsequent call wants to start a write request. raid5_run_ops only * subsequent call wants to start a write request. raid_run_ops only
* handles the case where compute block and postxor are requested * handles the case where compute block and reconstruct are requested
* simultaneously. If this is not the case then new writes need to be * simultaneously. If this is not the case then new writes need to be
* held off until the compute completes. * held off until the compute completes.
*/ */
if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
(s->locked == 0 && (rcw == 0 || rmw == 0) && (s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state))) !test_bit(STRIPE_BIT_DELAY, &sh->state)))
schedule_reconstruction5(sh, s, rcw == 0, 0); schedule_reconstruction(sh, s, rcw == 0, 0);
} }
static void handle_stripe_dirtying6(raid5_conf_t *conf, static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s, struct stripe_head *sh, struct stripe_head_state *s,
struct r6_state *r6s, int disks) struct r6_state *r6s, int disks)
{ {
int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; int rcw = 0, pd_idx = sh->pd_idx, i;
int qd_idx = sh->qd_idx; int qd_idx = sh->qd_idx;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
/* Would I have to read this buffer for reconstruct_write */
if (!test_bit(R5_OVERWRITE, &dev->flags)
&& i != pd_idx && i != qd_idx
&& (!test_bit(R5_LOCKED, &dev->flags)
) &&
!test_bit(R5_UPTODATE, &dev->flags)) {
if (test_bit(R5_Insync, &dev->flags)) rcw++;
else {
pr_debug("raid6: must_compute: "
"disk %d flags=%#lx\n", i, dev->flags);
must_compute++;
}
}
}
pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
(unsigned long long)sh->sector, rcw, must_compute);
set_bit(STRIPE_HANDLE, &sh->state);
if (rcw > 0) set_bit(STRIPE_HANDLE, &sh->state);
/* want reconstruct write, but need to get some data */
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (!test_bit(R5_OVERWRITE, &dev->flags) /* check if we haven't enough data */
&& !(s->failed == 0 && (i == pd_idx || i == qd_idx)) if (!test_bit(R5_OVERWRITE, &dev->flags) &&
&& !test_bit(R5_LOCKED, &dev->flags) && i != pd_idx && i != qd_idx &&
!test_bit(R5_UPTODATE, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
test_bit(R5_Insync, &dev->flags)) { !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) {
rcw++;
if (!test_bit(R5_Insync, &dev->flags))
continue; /* it's a failed drive */
if ( if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
pr_debug("Read_old stripe %llu " pr_debug("Read_old stripe %llu "
...@@ -2425,52 +2546,14 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf, ...@@ -2425,52 +2546,14 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
} }
} }
} }
/* now if nothing is locked, and if we have enough data, we can start a /* now if nothing is locked, and if we have enough data, we can start a
* write request * write request
*/ */
if (s->locked == 0 && rcw == 0 && if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)) { s->locked == 0 && rcw == 0 &&
if (must_compute > 0) { !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
/* We have failed blocks and need to compute them */ schedule_reconstruction(sh, s, 1, 0);
switch (s->failed) { }
case 0:
BUG();
case 1:
compute_block_1(sh, r6s->failed_num[0], 0);
break;
case 2:
compute_block_2(sh, r6s->failed_num[0],
r6s->failed_num[1]);
break;
default: /* This request should have been failed? */
BUG();
}
}
pr_debug("Computing parity for stripe %llu\n",
(unsigned long long)sh->sector);
compute_parity6(sh, RECONSTRUCT_WRITE);
/* now every locked buffer is ready to be written */
for (i = disks; i--; )
if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
pr_debug("Writing stripe %llu block %d\n",
(unsigned long long)sh->sector, i);
s->locked++;
set_bit(R5_Wantwrite, &sh->dev[i].flags);
}
if (s->locked == disks)
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
atomic_inc(&conf->pending_full_writes);
/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
set_bit(STRIPE_INSYNC, &sh->state);
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
}
}
} }
static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
...@@ -2528,7 +2611,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2528,7 +2611,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
* we are done. Otherwise update the mismatch count and repair * we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK * parity if !MD_RECOVERY_CHECK
*/ */
if (sh->ops.zero_sum_result == 0) if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
/* parity is correct (on disc, /* parity is correct (on disc,
* not in buffer any more) * not in buffer any more)
*/ */
...@@ -2545,6 +2628,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2545,6 +2628,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
set_bit(R5_Wantcompute, set_bit(R5_Wantcompute,
&sh->dev[sh->pd_idx].flags); &sh->dev[sh->pd_idx].flags);
sh->ops.target = sh->pd_idx; sh->ops.target = sh->pd_idx;
sh->ops.target2 = -1;
s->uptodate++; s->uptodate++;
} }
} }
...@@ -2562,66 +2646,73 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2562,66 +2646,73 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s, struct stripe_head_state *s,
struct r6_state *r6s, struct page *tmp_page, struct r6_state *r6s, int disks)
int disks)
{ {
int update_p = 0, update_q = 0;
struct r5dev *dev;
int pd_idx = sh->pd_idx; int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx; int qd_idx = sh->qd_idx;
struct r5dev *dev;
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
BUG_ON(s->failed > 2); BUG_ON(s->failed > 2);
BUG_ON(s->uptodate < disks);
/* Want to check and possibly repair P and Q. /* Want to check and possibly repair P and Q.
* However there could be one 'failed' device, in which * However there could be one 'failed' device, in which
* case we can only check one of them, possibly using the * case we can only check one of them, possibly using the
* other to generate missing data * other to generate missing data
*/ */
/* If !tmp_page, we cannot do the calculations, switch (sh->check_state) {
* but as we have set STRIPE_HANDLE, we will soon be called case check_state_idle:
* by stripe_handle with a tmp_page - just wait until then. /* start a new check operation if there are < 2 failures */
*/
if (tmp_page) {
if (s->failed == r6s->q_failed) { if (s->failed == r6s->q_failed) {
/* The only possible failed device holds 'Q', so it /* The only possible failed device holds Q, so it
* makes sense to check P (If anything else were failed, * makes sense to check P (If anything else were failed,
* we would have used P to recreate it). * we would have used P to recreate it).
*/ */
compute_block_1(sh, pd_idx, 1); sh->check_state = check_state_run;
if (!page_is_zero(sh->dev[pd_idx].page)) {
compute_block_1(sh, pd_idx, 0);
update_p = 1;
}
} }
if (!r6s->q_failed && s->failed < 2) { if (!r6s->q_failed && s->failed < 2) {
/* q is not failed, and we didn't use it to generate /* Q is not failed, and we didn't use it to generate
* anything, so it makes sense to check it * anything, so it makes sense to check it
*/ */
memcpy(page_address(tmp_page), if (sh->check_state == check_state_run)
page_address(sh->dev[qd_idx].page), sh->check_state = check_state_run_pq;
STRIPE_SIZE); else
compute_parity6(sh, UPDATE_PARITY); sh->check_state = check_state_run_q;
if (memcmp(page_address(tmp_page),
page_address(sh->dev[qd_idx].page),
STRIPE_SIZE) != 0) {
clear_bit(STRIPE_INSYNC, &sh->state);
update_q = 1;
} }
/* discard potentially stale zero_sum_result */
sh->ops.zero_sum_result = 0;
if (sh->check_state == check_state_run) {
/* async_xor_zero_sum destroys the contents of P */
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
s->uptodate--;
} }
if (update_p || update_q) { if (sh->check_state >= check_state_run &&
conf->mddev->resync_mismatches += STRIPE_SECTORS; sh->check_state <= check_state_run_pq) {
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) /* async_syndrome_zero_sum preserves P and Q, so
/* don't try to repair!! */ * no need to mark them !uptodate here
update_p = update_q = 0; */
set_bit(STRIPE_OP_CHECK, &s->ops_request);
break;
} }
/* we have 2-disk failure */
BUG_ON(s->failed != 2);
/* fall through */
case check_state_compute_result:
sh->check_state = check_state_idle;
/* check that a write has not made the stripe insync */
if (test_bit(STRIPE_INSYNC, &sh->state))
break;
/* now write out any block on a failed drive, /* now write out any block on a failed drive,
* or P or Q if they need it * or P or Q if they were recomputed
*/ */
BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
if (s->failed == 2) { if (s->failed == 2) {
dev = &sh->dev[r6s->failed_num[1]]; dev = &sh->dev[r6s->failed_num[1]];
s->locked++; s->locked++;
...@@ -2634,14 +2725,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2634,14 +2725,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_Wantwrite, &dev->flags);
} }
if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
if (update_p) {
dev = &sh->dev[pd_idx]; dev = &sh->dev[pd_idx];
s->locked++; s->locked++;
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_Wantwrite, &dev->flags);
} }
if (update_q) { if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
dev = &sh->dev[qd_idx]; dev = &sh->dev[qd_idx];
s->locked++; s->locked++;
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
...@@ -2650,6 +2740,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2650,6 +2740,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
clear_bit(STRIPE_DEGRADED, &sh->state); clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state); set_bit(STRIPE_INSYNC, &sh->state);
break;
case check_state_run:
case check_state_run_q:
case check_state_run_pq:
break; /* we will be called again upon completion */
case check_state_check_result:
sh->check_state = check_state_idle;
/* handle a successful check operation, if parity is correct
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
if (sh->ops.zero_sum_result == 0) {
/* both parities are correct */
if (!s->failed)
set_bit(STRIPE_INSYNC, &sh->state);
else {
/* in contrast to the raid5 case we can validate
* parity, but still have a failure to write
* back
*/
sh->check_state = check_state_compute_result;
/* Returning at this point means that we may go
* off and bring p and/or q uptodate again so
* we make sure to check zero_sum_result again
* to verify if p or q need writeback
*/
}
} else {
conf->mddev->resync_mismatches += STRIPE_SECTORS;
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
else {
int *target = &sh->ops.target;
sh->ops.target = -1;
sh->ops.target2 = -1;
sh->check_state = check_state_compute_run;
set_bit(STRIPE_COMPUTE_RUN, &sh->state);
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
set_bit(R5_Wantcompute,
&sh->dev[pd_idx].flags);
*target = pd_idx;
target = &sh->ops.target2;
s->uptodate++;
}
if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
set_bit(R5_Wantcompute,
&sh->dev[qd_idx].flags);
*target = qd_idx;
s->uptodate++;
}
}
}
break;
case check_state_compute_run:
break;
default:
printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
__func__, sh->check_state,
(unsigned long long) sh->sector);
BUG();
} }
} }
...@@ -2667,6 +2821,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2667,6 +2821,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
if (i != sh->pd_idx && i != sh->qd_idx) { if (i != sh->pd_idx && i != sh->qd_idx) {
int dd_idx, j; int dd_idx, j;
struct stripe_head *sh2; struct stripe_head *sh2;
struct async_submit_ctl submit;
sector_t bn = compute_blocknr(sh, i, 1); sector_t bn = compute_blocknr(sh, i, 1);
sector_t s = raid5_compute_sector(conf, bn, 0, sector_t s = raid5_compute_sector(conf, bn, 0,
...@@ -2686,9 +2841,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, ...@@ -2686,9 +2841,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
} }
/* place all the copies on one channel */ /* place all the copies on one channel */
init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
tx = async_memcpy(sh2->dev[dd_idx].page, tx = async_memcpy(sh2->dev[dd_idx].page,
sh->dev[i].page, 0, 0, STRIPE_SIZE, sh->dev[i].page, 0, 0, STRIPE_SIZE,
ASYNC_TX_DEP_ACK, tx, NULL, NULL); &submit);
set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
...@@ -2974,7 +3130,7 @@ static bool handle_stripe5(struct stripe_head *sh) ...@@ -2974,7 +3130,7 @@ static bool handle_stripe5(struct stripe_head *sh)
/* Need to write out all blocks after computing parity */ /* Need to write out all blocks after computing parity */
sh->disks = conf->raid_disks; sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh); stripe_set_idx(sh->sector, conf, 0, sh);
schedule_reconstruction5(sh, &s, 1, 1); schedule_reconstruction(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state); clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes); atomic_dec(&conf->reshape_stripes);
...@@ -2994,7 +3150,7 @@ static bool handle_stripe5(struct stripe_head *sh) ...@@ -2994,7 +3150,7 @@ static bool handle_stripe5(struct stripe_head *sh)
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request) if (s.ops_request)
raid5_run_ops(sh, s.ops_request); raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s); ops_run_io(sh, &s);
...@@ -3003,7 +3159,7 @@ static bool handle_stripe5(struct stripe_head *sh) ...@@ -3003,7 +3159,7 @@ static bool handle_stripe5(struct stripe_head *sh)
return blocked_rdev == NULL; return blocked_rdev == NULL;
} }
static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) static bool handle_stripe6(struct stripe_head *sh)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks; int disks = sh->disks;
...@@ -3015,9 +3171,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3015,9 +3171,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
mdk_rdev_t *blocked_rdev = NULL; mdk_rdev_t *blocked_rdev = NULL;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, " pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n", "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
(unsigned long long)sh->sector, sh->state, (unsigned long long)sh->sector, sh->state,
atomic_read(&sh->count), pd_idx, qd_idx); atomic_read(&sh->count), pd_idx, qd_idx,
sh->check_state, sh->reconstruct_state);
memset(&s, 0, sizeof(s)); memset(&s, 0, sizeof(s));
spin_lock(&sh->lock); spin_lock(&sh->lock);
...@@ -3037,35 +3194,24 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3037,35 +3194,24 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
pr_debug("check %d: state 0x%lx read %p write %p written %p\n", pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written); i, dev->flags, dev->toread, dev->towrite, dev->written);
/* maybe we can reply to a read */ /* maybe we can reply to a read
if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { *
struct bio *rbi, *rbi2; * new wantfill requests are only permitted while
pr_debug("Return read for disc %d\n", i); * ops_complete_biofill is guaranteed to be inactive
spin_lock_irq(&conf->device_lock); */
rbi = dev->toread; if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
dev->toread = NULL; !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
if (test_and_clear_bit(R5_Overlap, &dev->flags)) set_bit(R5_Wantfill, &dev->flags);
wake_up(&conf->wait_for_overlap);
spin_unlock_irq(&conf->device_lock);
while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
copy_data(0, rbi, dev->page, dev->sector);
rbi2 = r5_next_bio(rbi, dev->sector);
spin_lock_irq(&conf->device_lock);
if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
spin_unlock_irq(&conf->device_lock);
rbi = rbi2;
}
}
/* now count some things */ /* now count some things */
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
if (test_bit(R5_Wantcompute, &dev->flags))
BUG_ON(++s.compute > 2);
if (test_bit(R5_Wantfill, &dev->flags)) {
if (dev->toread) s.to_fill++;
} else if (dev->toread)
s.to_read++; s.to_read++;
if (dev->towrite) { if (dev->towrite) {
s.to_write++; s.to_write++;
...@@ -3106,6 +3252,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3106,6 +3252,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
blocked_rdev = NULL; blocked_rdev = NULL;
} }
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
set_bit(STRIPE_BIOFILL_RUN, &sh->state);
}
pr_debug("locked=%d uptodate=%d to_read=%d" pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n", " to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed, s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
...@@ -3146,19 +3297,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3146,19 +3297,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
* or to load a block that is being partially written. * or to load a block that is being partially written.
*/ */
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
(s.syncing && (s.uptodate < disks)) || s.expanding) (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
handle_stripe_fill6(sh, &s, &r6s, disks); handle_stripe_fill6(sh, &s, &r6s, disks);
/* now to consider writing and what else, if anything should be read */ /* Now we check to see if any write operations have recently
if (s.to_write) * completed
*/
if (sh->reconstruct_state == reconstruct_state_drain_result) {
int qd_idx = sh->qd_idx;
sh->reconstruct_state = reconstruct_state_idle;
/* All the 'written' buffers and the parity blocks are ready to
* be written back to disk
*/
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
for (i = disks; i--; ) {
dev = &sh->dev[i];
if (test_bit(R5_LOCKED, &dev->flags) &&
(i == sh->pd_idx || i == qd_idx ||
dev->written)) {
pr_debug("Writing block %d\n", i);
BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
set_bit(R5_Wantwrite, &dev->flags);
if (!test_bit(R5_Insync, &dev->flags) ||
((i == sh->pd_idx || i == qd_idx) &&
s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
}
}
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
}
}
/* Now to consider new write requests and what else, if anything
* should be read. We do not handle new writes when:
* 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
* 2/ A 'check' operation is in flight, as it may clobber the parity
* block.
*/
if (s.to_write && !sh->reconstruct_state && !sh->check_state)
handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
/* maybe we need to check and possibly fix the parity for this stripe /* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough * Any reads will already have been scheduled, so we just see if enough
* data is available * data is available. The parity check is held off while parity
* dependent operations are in flight.
*/ */
if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) if (sh->check_state ||
handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); (s.syncing && s.locked == 0 &&
!test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
!test_bit(STRIPE_INSYNC, &sh->state)))
handle_parity_checks6(conf, sh, &s, &r6s, disks);
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
md_done_sync(conf->mddev, STRIPE_SECTORS,1); md_done_sync(conf->mddev, STRIPE_SECTORS,1);
...@@ -3179,15 +3373,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3179,15 +3373,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_Wantwrite, &dev->flags);
set_bit(R5_ReWrite, &dev->flags); set_bit(R5_ReWrite, &dev->flags);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
s.locked++;
} else { } else {
/* let's read it back */ /* let's read it back */
set_bit(R5_Wantread, &dev->flags); set_bit(R5_Wantread, &dev->flags);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
s.locked++;
}
} }
} }
/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
}
} }
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
!sh->reconstruct_state) {
struct stripe_head *sh2 struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1, 1); = get_active_stripe(conf, sh->sector, 1, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
...@@ -3208,14 +3416,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3208,14 +3416,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
/* Need to write out all blocks after computing P&Q */ /* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks; sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh); stripe_set_idx(sh->sector, conf, 0, sh);
compute_parity6(sh, RECONSTRUCT_WRITE); schedule_reconstruction(sh, &s, 1, 1);
for (i = conf->raid_disks ; i-- ; ) { } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
set_bit(R5_Wantwrite, &sh->dev[i].flags);
}
clear_bit(STRIPE_EXPANDING, &sh->state);
} else if (s.expanded) {
clear_bit(STRIPE_EXPAND_READY, &sh->state); clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes); atomic_dec(&conf->reshape_stripes);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
...@@ -3233,6 +3435,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3233,6 +3435,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (unlikely(blocked_rdev)) if (unlikely(blocked_rdev))
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request)
raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s); ops_run_io(sh, &s);
return_io(return_bi); return_io(return_bi);
...@@ -3241,16 +3446,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ...@@ -3241,16 +3446,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
} }
/* returns true if the stripe was handled */ /* returns true if the stripe was handled */
static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) static bool handle_stripe(struct stripe_head *sh)
{ {
if (sh->raid_conf->level == 6) if (sh->raid_conf->level == 6)
return handle_stripe6(sh, tmp_page); return handle_stripe6(sh);
else else
return handle_stripe5(sh); return handle_stripe5(sh);
} }
static void raid5_activate_delayed(raid5_conf_t *conf) static void raid5_activate_delayed(raid5_conf_t *conf)
{ {
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
...@@ -4046,7 +4249,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski ...@@ -4046,7 +4249,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
spin_unlock(&sh->lock); spin_unlock(&sh->lock);
/* wait for any blocked device to be handled */ /* wait for any blocked device to be handled */
while(unlikely(!handle_stripe(sh, NULL))) while (unlikely(!handle_stripe(sh)))
; ;
release_stripe(sh); release_stripe(sh);
...@@ -4103,7 +4306,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) ...@@ -4103,7 +4306,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
return handled; return handled;
} }
handle_stripe(sh, NULL); handle_stripe(sh);
release_stripe(sh); release_stripe(sh);
handled++; handled++;
} }
...@@ -4117,6 +4320,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) ...@@ -4117,6 +4320,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
return handled; return handled;
} }
#ifdef CONFIG_MULTICORE_RAID456
static void __process_stripe(void *param, async_cookie_t cookie)
{
struct stripe_head *sh = param;
handle_stripe(sh);
release_stripe(sh);
}
static void process_stripe(struct stripe_head *sh, struct list_head *domain)
{
async_schedule_domain(__process_stripe, sh, domain);
}
static void synchronize_stripe_processing(struct list_head *domain)
{
async_synchronize_full_domain(domain);
}
#else
static void process_stripe(struct stripe_head *sh, struct list_head *domain)
{
handle_stripe(sh);
release_stripe(sh);
cond_resched();
}
static void synchronize_stripe_processing(struct list_head *domain)
{
}
#endif
/* /*
...@@ -4131,6 +4364,7 @@ static void raid5d(mddev_t *mddev) ...@@ -4131,6 +4364,7 @@ static void raid5d(mddev_t *mddev)
struct stripe_head *sh; struct stripe_head *sh;
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
int handled; int handled;
LIST_HEAD(raid_domain);
pr_debug("+++ raid5d active\n"); pr_debug("+++ raid5d active\n");
...@@ -4167,8 +4401,7 @@ static void raid5d(mddev_t *mddev) ...@@ -4167,8 +4401,7 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
handled++; handled++;
handle_stripe(sh, conf->spare_page); process_stripe(sh, &raid_domain);
release_stripe(sh);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
} }
...@@ -4176,6 +4409,7 @@ static void raid5d(mddev_t *mddev) ...@@ -4176,6 +4409,7 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
synchronize_stripe_processing(&raid_domain);
async_tx_issue_pending_all(); async_tx_issue_pending_all();
unplug_slaves(mddev); unplug_slaves(mddev);
...@@ -4308,6 +4542,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) ...@@ -4308,6 +4542,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return sectors * (raid_disks - conf->max_degraded); return sectors * (raid_disks - conf->max_degraded);
} }
static void raid5_free_percpu(raid5_conf_t *conf)
{
struct raid5_percpu *percpu;
unsigned long cpu;
if (!conf->percpu)
return;
get_online_cpus();
for_each_possible_cpu(cpu) {
percpu = per_cpu_ptr(conf->percpu, cpu);
safe_put_page(percpu->spare_page);
kfree(percpu->scribble);
}
#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu_notifier(&conf->cpu_notify);
#endif
put_online_cpus();
free_percpu(conf->percpu);
}
static void free_conf(raid5_conf_t *conf)
{
shrink_stripes(conf);
raid5_free_percpu(conf);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
}
#ifdef CONFIG_HOTPLUG_CPU
static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
void *hcpu)
{
raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
long cpu = (long)hcpu;
struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
if (conf->level == 6 && !percpu->spare_page)
percpu->spare_page = alloc_page(GFP_KERNEL);
if (!percpu->scribble)
percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
if (!percpu->scribble ||
(conf->level == 6 && !percpu->spare_page)) {
safe_put_page(percpu->spare_page);
kfree(percpu->scribble);
pr_err("%s: failed memory allocation for cpu%ld\n",
__func__, cpu);
return NOTIFY_BAD;
}
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
safe_put_page(percpu->spare_page);
kfree(percpu->scribble);
percpu->spare_page = NULL;
percpu->scribble = NULL;
break;
default:
break;
}
return NOTIFY_OK;
}
#endif
static int raid5_alloc_percpu(raid5_conf_t *conf)
{
unsigned long cpu;
struct page *spare_page;
struct raid5_percpu *allcpus;
void *scribble;
int err;
allcpus = alloc_percpu(struct raid5_percpu);
if (!allcpus)
return -ENOMEM;
conf->percpu = allcpus;
get_online_cpus();
err = 0;
for_each_present_cpu(cpu) {
if (conf->level == 6) {
spare_page = alloc_page(GFP_KERNEL);
if (!spare_page) {
err = -ENOMEM;
break;
}
per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
}
scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
if (!scribble) {
err = -ENOMEM;
break;
}
per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
}
#ifdef CONFIG_HOTPLUG_CPU
conf->cpu_notify.notifier_call = raid456_cpu_notify;
conf->cpu_notify.priority = 0;
if (err == 0)
err = register_cpu_notifier(&conf->cpu_notify);
#endif
put_online_cpus();
return err;
}
static raid5_conf_t *setup_conf(mddev_t *mddev) static raid5_conf_t *setup_conf(mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
...@@ -4347,6 +4693,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4347,6 +4693,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
goto abort; goto abort;
conf->raid_disks = mddev->raid_disks; conf->raid_disks = mddev->raid_disks;
conf->scribble_len = scribble_len(conf->raid_disks);
if (mddev->reshape_position == MaxSector) if (mddev->reshape_position == MaxSector)
conf->previous_raid_disks = mddev->raid_disks; conf->previous_raid_disks = mddev->raid_disks;
else else
...@@ -4362,11 +4709,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4362,11 +4709,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort; goto abort;
if (mddev->new_level == 6) { conf->level = mddev->new_level;
conf->spare_page = alloc_page(GFP_KERNEL); if (raid5_alloc_percpu(conf) != 0)
if (!conf->spare_page)
goto abort; goto abort;
}
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap); init_waitqueue_head(&conf->wait_for_overlap);
...@@ -4402,7 +4748,6 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4402,7 +4748,6 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
} }
conf->chunk_size = mddev->new_chunk; conf->chunk_size = mddev->new_chunk;
conf->level = mddev->new_level;
if (conf->level == 6) if (conf->level == 6)
conf->max_degraded = 2; conf->max_degraded = 2;
else else
...@@ -4437,11 +4782,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4437,11 +4782,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
abort: abort:
if (conf) { if (conf) {
shrink_stripes(conf); free_conf(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} else } else
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -4607,12 +4948,8 @@ static int run(mddev_t *mddev) ...@@ -4607,12 +4948,8 @@ static int run(mddev_t *mddev)
md_unregister_thread(mddev->thread); md_unregister_thread(mddev->thread);
mddev->thread = NULL; mddev->thread = NULL;
if (conf) { if (conf) {
shrink_stripes(conf);
print_raid5_conf(conf); print_raid5_conf(conf);
safe_put_page(conf->spare_page); free_conf(conf);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
} }
mddev->private = NULL; mddev->private = NULL;
printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
...@@ -4627,13 +4964,10 @@ static int stop(mddev_t *mddev) ...@@ -4627,13 +4964,10 @@ static int stop(mddev_t *mddev)
md_unregister_thread(mddev->thread); md_unregister_thread(mddev->thread);
mddev->thread = NULL; mddev->thread = NULL;
shrink_stripes(conf);
kfree(conf->stripe_hashtbl);
mddev->queue->backing_dev_info.congested_fn = NULL; mddev->queue->backing_dev_info.congested_fn = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
kfree(conf->disks); free_conf(conf);
kfree(conf);
mddev->private = NULL; mddev->private = NULL;
return 0; return 0;
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define _RAID5_H #define _RAID5_H
#include <linux/raid/xor.h> #include <linux/raid/xor.h>
#include <linux/dmaengine.h>
/* /*
* *
...@@ -175,7 +176,9 @@ ...@@ -175,7 +176,9 @@
*/ */
enum check_states { enum check_states {
check_state_idle = 0, check_state_idle = 0,
check_state_run, /* parity check */ check_state_run, /* xor parity check */
check_state_run_q, /* q-parity check */
check_state_run_pq, /* pq dual parity check */
check_state_check_result, check_state_check_result,
check_state_compute_run, /* parity repair */ check_state_compute_run, /* parity repair */
check_state_compute_result, check_state_compute_result,
...@@ -215,8 +218,8 @@ struct stripe_head { ...@@ -215,8 +218,8 @@ struct stripe_head {
* @target - STRIPE_OP_COMPUTE_BLK target * @target - STRIPE_OP_COMPUTE_BLK target
*/ */
struct stripe_operations { struct stripe_operations {
int target; int target, target2;
u32 zero_sum_result; enum sum_check_flags zero_sum_result;
} ops; } ops;
struct r5dev { struct r5dev {
struct bio req; struct bio req;
...@@ -298,7 +301,7 @@ struct r6_state { ...@@ -298,7 +301,7 @@ struct r6_state {
#define STRIPE_OP_COMPUTE_BLK 1 #define STRIPE_OP_COMPUTE_BLK 1
#define STRIPE_OP_PREXOR 2 #define STRIPE_OP_PREXOR 2
#define STRIPE_OP_BIODRAIN 3 #define STRIPE_OP_BIODRAIN 3
#define STRIPE_OP_POSTXOR 4 #define STRIPE_OP_RECONSTRUCT 4
#define STRIPE_OP_CHECK 5 #define STRIPE_OP_CHECK 5
/* /*
...@@ -383,8 +386,21 @@ struct raid5_private_data { ...@@ -383,8 +386,21 @@ struct raid5_private_data {
* (fresh device added). * (fresh device added).
* Cleared when a sync completes. * Cleared when a sync completes.
*/ */
/* per cpu variables */
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */ struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
* conversions
*/
} *percpu;
size_t scribble_len; /* size of scribble region must be
* associated with conf to handle
* cpu hotplug while reshaping
*/
#ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify;
#endif
/* /*
* Free stripes pool * Free stripes pool
......
...@@ -58,24 +58,56 @@ struct dma_chan_ref { ...@@ -58,24 +58,56 @@ struct dma_chan_ref {
* array. * array.
* @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
* dependency chain * dependency chain
* @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
*/ */
enum async_tx_flags { enum async_tx_flags {
ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_ZERO_DST = (1 << 0),
ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_XOR_DROP_DST = (1 << 1),
ASYNC_TX_ACK = (1 << 3), ASYNC_TX_ACK = (1 << 2),
ASYNC_TX_DEP_ACK = (1 << 4), };
/**
* struct async_submit_ctl - async_tx submission/completion modifiers
* @flags: submission modifiers
* @depend_tx: parent dependency of the current operation being submitted
* @cb_fn: callback routine to run at operation completion
* @cb_param: parameter for the callback routine
* @scribble: caller provided space for dma/page address conversions
*/
struct async_submit_ctl {
enum async_tx_flags flags;
struct dma_async_tx_descriptor *depend_tx;
dma_async_tx_callback cb_fn;
void *cb_param;
void *scribble;
}; };
#ifdef CONFIG_DMA_ENGINE #ifdef CONFIG_DMA_ENGINE
#define async_tx_issue_pending_all dma_issue_pending_all #define async_tx_issue_pending_all dma_issue_pending_all
/**
* async_tx_issue_pending - send pending descriptor to the hardware channel
* @tx: descriptor handle to retrieve hardware context
*
* Note: any dependent operations will have already been issued by
* async_tx_channel_switch, or (in the case of no channel switch) will
* be already pending on this channel.
*/
static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
{
if (likely(tx)) {
struct dma_chan *chan = tx->chan;
struct dma_device *dma = chan->device;
dma->device_issue_pending(chan);
}
}
#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
#include <asm/async_tx.h> #include <asm/async_tx.h>
#else #else
#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
__async_tx_find_channel(dep, type) __async_tx_find_channel(dep, type)
struct dma_chan * struct dma_chan *
__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, __async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type); enum dma_transaction_type tx_type);
#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
#else #else
...@@ -84,10 +116,16 @@ static inline void async_tx_issue_pending_all(void) ...@@ -84,10 +116,16 @@ static inline void async_tx_issue_pending_all(void)
do { } while (0); do { } while (0);
} }
static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
{
do { } while (0);
}
static inline struct dma_chan * static inline struct dma_chan *
async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, async_tx_find_channel(struct async_submit_ctl *submit,
enum dma_transaction_type tx_type, struct page **dst, int dst_count, enum dma_transaction_type tx_type, struct page **dst,
struct page **src, int src_count, size_t len) int dst_count, struct page **src, int src_count,
size_t len)
{ {
return NULL; return NULL;
} }
...@@ -99,46 +137,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ...@@ -99,46 +137,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
* @cb_fn_param: parameter to pass to the callback routine * @cb_fn_param: parameter to pass to the callback routine
*/ */
static inline void static inline void
async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) async_tx_sync_epilog(struct async_submit_ctl *submit)
{ {
if (cb_fn) if (submit->cb_fn)
cb_fn(cb_fn_param); submit->cb_fn(submit->cb_param);
} }
void typedef union {
async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, unsigned long addr;
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, struct page *page;
dma_async_tx_callback cb_fn, void *cb_fn_param); dma_addr_t dma;
} addr_conv_t;
static inline void
init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
struct dma_async_tx_descriptor *tx,
dma_async_tx_callback cb_fn, void *cb_param,
addr_conv_t *scribble)
{
args->flags = flags;
args->depend_tx = tx;
args->cb_fn = cb_fn;
args->cb_param = cb_param;
args->scribble = scribble;
}
void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_xor(struct page *dest, struct page **src_list, unsigned int offset, async_xor(struct page *dest, struct page **src_list, unsigned int offset,
int src_cnt, size_t len, enum async_tx_flags flags, int src_cnt, size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_xor_zero_sum(struct page *dest, struct page **src_list, async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
unsigned int offset, int src_cnt, size_t len, int src_cnt, size_t len, enum sum_check_flags *result,
u32 *result, enum async_tx_flags flags, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
unsigned int src_offset, size_t len, enum async_tx_flags flags, unsigned int src_offset, size_t len,
struct dma_async_tx_descriptor *depend_tx, struct async_submit_ctl *submit);
dma_async_tx_callback cb_fn, void *cb_fn_param);
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_memset(struct page *dest, int val, unsigned int offset, async_memset(struct page *dest, int val, unsigned int offset,
size_t len, enum async_tx_flags flags, size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param); struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
struct page **ptrs, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor * struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags, async_raid6_datap_recov(int src_num, size_t bytes, int faila,
struct dma_async_tx_descriptor *depend_tx, struct page **ptrs, struct async_submit_ctl *submit);
dma_async_tx_callback cb_fn, void *cb_fn_param);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx); void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */ #endif /* _ASYNC_TX_H_ */
...@@ -52,11 +52,11 @@ enum dma_status { ...@@ -52,11 +52,11 @@ enum dma_status {
enum dma_transaction_type { enum dma_transaction_type {
DMA_MEMCPY, DMA_MEMCPY,
DMA_XOR, DMA_XOR,
DMA_PQ_XOR, DMA_PQ,
DMA_DUAL_XOR, DMA_DUAL_XOR,
DMA_PQ_UPDATE, DMA_PQ_UPDATE,
DMA_ZERO_SUM, DMA_XOR_VAL,
DMA_PQ_ZERO_SUM, DMA_PQ_VAL,
DMA_MEMSET, DMA_MEMSET,
DMA_MEMCPY_CRC32C, DMA_MEMCPY_CRC32C,
DMA_INTERRUPT, DMA_INTERRUPT,
...@@ -74,14 +74,19 @@ enum dma_transaction_type { ...@@ -74,14 +74,19 @@ enum dma_transaction_type {
* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
* this transaction * this transaction
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
* acknowledges receipt, i.e. has has a chance to establish any * acknowledges receipt, i.e. has has a chance to establish any dependency
* dependency chains * chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
* @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
* (if not set, do the source dma-unmapping as page) * (if not set, do the source dma-unmapping as page)
* @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
* (if not set, do the destination dma-unmapping as page) * (if not set, do the destination dma-unmapping as page)
* @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
* @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
* sources that were the result of a previous operation, in the case of a PQ
* operation it continues the calculation with new sources
*/ */
enum dma_ctrl_flags { enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0), DMA_PREP_INTERRUPT = (1 << 0),
...@@ -90,8 +95,30 @@ enum dma_ctrl_flags { ...@@ -90,8 +95,30 @@ enum dma_ctrl_flags {
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
DMA_PREP_PQ_DISABLE_P = (1 << 6),
DMA_PREP_PQ_DISABLE_Q = (1 << 7),
DMA_PREP_CONTINUE = (1 << 8),
}; };
/**
* enum sum_check_bits - bit position of pq_check_flags
*/
enum sum_check_bits {
SUM_CHECK_P = 0,
SUM_CHECK_Q = 1,
};
/**
* enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
* @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
* @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
*/
enum sum_check_flags {
SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
};
/** /**
* dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
* See linux/cpumask.h * See linux/cpumask.h
...@@ -213,6 +240,7 @@ struct dma_async_tx_descriptor { ...@@ -213,6 +240,7 @@ struct dma_async_tx_descriptor {
* @global_node: list_head for global dma_device_list * @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags * @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability * @max_xor: maximum number of xor sources, 0 if no capability
* @max_pq: maximum number of PQ sources and PQ-continue capability
* @dev_id: unique device ID * @dev_id: unique device ID
* @dev: struct device reference for dma mapping api * @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the * @device_alloc_chan_resources: allocate resources and return the
...@@ -220,7 +248,9 @@ struct dma_async_tx_descriptor { ...@@ -220,7 +248,9 @@ struct dma_async_tx_descriptor {
* @device_free_chan_resources: release DMA channel's resources * @device_free_chan_resources: release DMA channel's resources
* @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor: prepares a xor operation
* @device_prep_dma_zero_sum: prepares a zero_sum operation * @device_prep_dma_xor_val: prepares a xor validation operation
* @device_prep_dma_pq: prepares a pq operation
* @device_prep_dma_pq_val: prepares a pqzero_sum operation
* @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation * @device_prep_slave_sg: prepares a slave dma operation
...@@ -235,7 +265,9 @@ struct dma_device { ...@@ -235,7 +265,9 @@ struct dma_device {
struct list_head channels; struct list_head channels;
struct list_head global_node; struct list_head global_node;
dma_cap_mask_t cap_mask; dma_cap_mask_t cap_mask;
int max_xor; unsigned short max_xor;
unsigned short max_pq;
#define DMA_HAS_PQ_CONTINUE (1 << 15)
int dev_id; int dev_id;
struct device *dev; struct device *dev;
...@@ -249,9 +281,17 @@ struct dma_device { ...@@ -249,9 +281,17 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags); unsigned int src_cnt, size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
size_t len, u32 *result, unsigned long flags); size_t len, enum sum_check_flags *result, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf,
size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
enum sum_check_flags *pqres, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, dma_addr_t dest, int value, size_t len, struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
unsigned long flags); unsigned long flags);
...@@ -270,6 +310,60 @@ struct dma_device { ...@@ -270,6 +310,60 @@ struct dma_device {
void (*device_issue_pending)(struct dma_chan *chan); void (*device_issue_pending)(struct dma_chan *chan);
}; };
static inline void
dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
{
dma->max_pq = maxpq;
if (has_pq_continue)
dma->max_pq |= DMA_HAS_PQ_CONTINUE;
}
static inline bool dmaf_continue(enum dma_ctrl_flags flags)
{
return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
}
static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
{
enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
return (flags & mask) == mask;
}
static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
{
return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
}
static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
{
return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
}
/* dma_maxpq - reduce maxpq in the face of continued operations
* @dma - dma device with PQ capability
* @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
*
* When an engine does not support native continuation we need 3 extra
* source slots to reuse P and Q with the following coefficients:
* 1/ {00} * P : remove P from Q', but use it as a source for P'
* 2/ {01} * Q : use Q to continue Q' calculation
* 3/ {00} * Q : subtract Q from P' to cancel (2)
*
* In the case where P is disabled we only need 1 extra source:
* 1/ {01} * Q : use Q to continue Q' calculation
*/
static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
{
if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
return dma_dev_to_maxpq(dma);
else if (dmaf_p_disabled_continue(flags))
return dma_dev_to_maxpq(dma) - 1;
else if (dmaf_continue(flags))
return dma_dev_to_maxpq(dma) - 3;
BUG();
}
/* --- public DMA engine API --- */ /* --- public DMA engine API --- */
#ifdef CONFIG_DMA_ENGINE #ifdef CONFIG_DMA_ENGINE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment