Commit 6460e75a authored by Douglas Gilbert's avatar Douglas Gilbert Committed by James Bottomley

[SCSI] sg: fixes for large page_size

This sg driver patch addresses the problem with larger
page sizes reported by Brian King in this post:
http://marc.theaimsgroup.com/?l=linux-scsi&m=115867718623631&w=2
Some other related matters are also addressed. Some of these
prevent oopses when the SG_SCATTER_SZ or scatter_elem_sz are
set to inappropriate values.

The scatter_elem_sz has been tested up to 4 MB which should
make the largest data transfer with one SCSI command, 32 MB
less one block, achievable with a relatively small number
of elements in the scatter gather list.

ChangeLog:
    - add scatter_elem_sz boot time parameter and sysfs module
      parameter that is initialized to SG_SCATTER_SZ
    - the driver will then adjust scatter_elem_sz to be the
      max(given(scatter_elem_sz), PAGE_SIZE)
      It will also round it up, if necessary, to be a power
      of two
    - clean up sg.h header, correct bad urls and some statements
      that are no longer valid
    - make the def_reserved_size sysfs module attribute writable
Signed-off-by: default avatarDouglas Gilbert <dougg@torque.net>
Signed-off-by: default avatarJames Bottomley <James.Bottomley@SteelEye.com>
parent 8aee918a
...@@ -60,7 +60,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */ ...@@ -60,7 +60,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#ifdef CONFIG_SCSI_PROC_FS #ifdef CONFIG_SCSI_PROC_FS
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
static char *sg_version_date = "20060818"; static char *sg_version_date = "20060920";
static int sg_proc_init(void); static int sg_proc_init(void);
static void sg_proc_cleanup(void); static void sg_proc_cleanup(void);
...@@ -94,6 +94,9 @@ int sg_big_buff = SG_DEF_RESERVED_SIZE; ...@@ -94,6 +94,9 @@ int sg_big_buff = SG_DEF_RESERVED_SIZE;
static int def_reserved_size = -1; /* picks up init parameter */ static int def_reserved_size = -1; /* picks up init parameter */
static int sg_allow_dio = SG_ALLOW_DIO_DEF; static int sg_allow_dio = SG_ALLOW_DIO_DEF;
static int scatter_elem_sz = SG_SCATTER_SZ;
static int scatter_elem_sz_prev = SG_SCATTER_SZ;
#define SG_SECTOR_SZ 512 #define SG_SECTOR_SZ 512
#define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1)
...@@ -1537,11 +1540,9 @@ sg_remove(struct class_device *cl_dev, struct class_interface *cl_intf) ...@@ -1537,11 +1540,9 @@ sg_remove(struct class_device *cl_dev, struct class_interface *cl_intf)
msleep(10); /* dirty detach so delay device destruction */ msleep(10); /* dirty detach so delay device destruction */
} }
/* Set 'perm' (4th argument) to 0 to disable module_param's definition module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR);
* of sysfs parameters (which module_param doesn't yet support). module_param_named(def_reserved_size, def_reserved_size, int,
* Sysfs parameters defined explicitly below. S_IRUGO | S_IWUSR);
*/
module_param_named(def_reserved_size, def_reserved_size, int, S_IRUGO);
module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR); module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR);
MODULE_AUTHOR("Douglas Gilbert"); MODULE_AUTHOR("Douglas Gilbert");
...@@ -1550,6 +1551,8 @@ MODULE_LICENSE("GPL"); ...@@ -1550,6 +1551,8 @@ MODULE_LICENSE("GPL");
MODULE_VERSION(SG_VERSION_STR); MODULE_VERSION(SG_VERSION_STR);
MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR); MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR);
MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element "
"size (default: max(SG_SCATTER_SZ, PAGE_SIZE))");
MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd"); MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd");
MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))");
...@@ -1558,8 +1561,14 @@ init_sg(void) ...@@ -1558,8 +1561,14 @@ init_sg(void)
{ {
int rc; int rc;
if (scatter_elem_sz < PAGE_SIZE) {
scatter_elem_sz = PAGE_SIZE;
scatter_elem_sz_prev = scatter_elem_sz;
}
if (def_reserved_size >= 0) if (def_reserved_size >= 0)
sg_big_buff = def_reserved_size; sg_big_buff = def_reserved_size;
else
def_reserved_size = sg_big_buff;
rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
SG_MAX_DEVS, "sg"); SG_MAX_DEVS, "sg");
...@@ -1842,15 +1851,30 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) ...@@ -1842,15 +1851,30 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
if (mx_sc_elems < 0) if (mx_sc_elems < 0)
return mx_sc_elems; /* most likely -ENOMEM */ return mx_sc_elems; /* most likely -ENOMEM */
num = scatter_elem_sz;
if (unlikely(num != scatter_elem_sz_prev)) {
if (num < PAGE_SIZE) {
scatter_elem_sz = PAGE_SIZE;
scatter_elem_sz_prev = PAGE_SIZE;
} else
scatter_elem_sz_prev = num;
}
for (k = 0, sg = schp->buffer, rem_sz = blk_size; for (k = 0, sg = schp->buffer, rem_sz = blk_size;
(rem_sz > 0) && (k < mx_sc_elems); (rem_sz > 0) && (k < mx_sc_elems);
++k, rem_sz -= ret_sz, ++sg) { ++k, rem_sz -= ret_sz, ++sg) {
num = (rem_sz > SG_SCATTER_SZ) ? SG_SCATTER_SZ : rem_sz; num = (rem_sz > scatter_elem_sz_prev) ?
scatter_elem_sz_prev : rem_sz;
p = sg_page_malloc(num, sfp->low_dma, &ret_sz); p = sg_page_malloc(num, sfp->low_dma, &ret_sz);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
if (num == scatter_elem_sz_prev) {
if (unlikely(ret_sz > scatter_elem_sz_prev)) {
scatter_elem_sz = ret_sz;
scatter_elem_sz_prev = ret_sz;
}
}
sg->page = p; sg->page = p;
sg->length = ret_sz; sg->length = ret_sz;
...@@ -2341,6 +2365,9 @@ sg_add_sfp(Sg_device * sdp, int dev) ...@@ -2341,6 +2365,9 @@ sg_add_sfp(Sg_device * sdp, int dev)
} }
write_unlock_irqrestore(&sg_dev_arr_lock, iflags); write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp));
if (unlikely(sg_big_buff != def_reserved_size))
sg_big_buff = def_reserved_size;
sg_build_reserve(sfp, sg_big_buff); sg_build_reserve(sfp, sg_big_buff);
SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n", SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n",
sfp->reserve.bufflen, sfp->reserve.k_use_sg)); sfp->reserve.bufflen, sfp->reserve.k_use_sg));
...@@ -2437,16 +2464,16 @@ sg_res_in_use(Sg_fd * sfp) ...@@ -2437,16 +2464,16 @@ sg_res_in_use(Sg_fd * sfp)
return srp ? 1 : 0; return srp ? 1 : 0;
} }
/* If retSzp==NULL want exact size or fail */ /* The size fetched (value output via retSzp) set when non-NULL return */
static struct page * static struct page *
sg_page_malloc(int rqSz, int lowDma, int *retSzp) sg_page_malloc(int rqSz, int lowDma, int *retSzp)
{ {
struct page *resp = NULL; struct page *resp = NULL;
gfp_t page_mask; gfp_t page_mask;
int order, a_size; int order, a_size;
int resSz = rqSz; int resSz;
if (rqSz <= 0) if ((rqSz <= 0) || (NULL == retSzp))
return resp; return resp;
if (lowDma) if (lowDma)
...@@ -2456,8 +2483,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp) ...@@ -2456,8 +2483,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp)
for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
order++, a_size <<= 1) ; order++, a_size <<= 1) ;
resSz = a_size; /* rounded up if necessary */
resp = alloc_pages(page_mask, order); resp = alloc_pages(page_mask, order);
while ((!resp) && order && retSzp) { while ((!resp) && order) {
--order; --order;
a_size >>= 1; /* divide by 2, until PAGE_SIZE */ a_size >>= 1; /* divide by 2, until PAGE_SIZE */
resp = alloc_pages(page_mask, order); /* try half */ resp = alloc_pages(page_mask, order); /* try half */
...@@ -2466,8 +2494,7 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp) ...@@ -2466,8 +2494,7 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp)
if (resp) { if (resp) {
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
memset(page_address(resp), 0, resSz); memset(page_address(resp), 0, resSz);
if (retSzp) *retSzp = resSz;
*retSzp = resSz;
} }
return resp; return resp;
} }
......
...@@ -11,26 +11,10 @@ ...@@ -11,26 +11,10 @@
Original driver (sg.h): Original driver (sg.h):
* Copyright (C) 1992 Lawrence Foard * Copyright (C) 1992 Lawrence Foard
Version 2 and 3 extensions to driver: Version 2 and 3 extensions to driver:
* Copyright (C) 1998 - 2003 Douglas Gilbert * Copyright (C) 1998 - 2006 Douglas Gilbert
Version: 3.5.29 (20030529) Version: 3.5.34 (20060920)
This version is for 2.5 series kernels. This version is for 2.6 series kernels.
Changes since 3.5.28 (20030308)
- fix bug introduced in version 3.1.24 (last segment of sgat list)
Changes since 3.5.27 (20020812)
- remove procfs entries: hosts, host_hdr + host_strs (now in sysfs)
- add sysfs sg driver params: def_reserved_size, allow_dio, version
- new boot option: "sg_allow_dio" and module parameter: "allow_dio"
- multiple internal changes due to scsi subsystem rework
Changes since 3.5.26 (20020708)
- re-add direct IO using Kai Makisara's work
- re-tab to 8, start using C99-isms
- simplify memory management
Changes since 3.5.25 (20020504)
- driverfs additions
- copy_to/from_user() fixes [William Stinson]
- disable kiobufs support
For a full changelog see http://www.torque.net/sg For a full changelog see http://www.torque.net/sg
...@@ -40,7 +24,7 @@ Map of SG verions to the Linux kernels in which they appear: ...@@ -40,7 +24,7 @@ Map of SG verions to the Linux kernels in which they appear:
2.1.40 2.2.20 2.1.40 2.2.20
3.0.x optional version 3 sg driver for 2.2 series 3.0.x optional version 3 sg driver for 2.2 series
3.1.17++ 2.4.0++ 3.1.17++ 2.4.0++
3.5.23++ 2.5.0++ 3.5.30++ 2.6.0++
Major new features in SG 3.x driver (cf SG 2.x drivers) Major new features in SG 3.x driver (cf SG 2.x drivers)
- SG_IO ioctl() combines function if write() and read() - SG_IO ioctl() combines function if write() and read()
...@@ -51,14 +35,15 @@ Major new features in SG 3.x driver (cf SG 2.x drivers) ...@@ -51,14 +35,15 @@ Major new features in SG 3.x driver (cf SG 2.x drivers)
data into kernel buffers and then use the CPU to copy the data into the data into kernel buffers and then use the CPU to copy the data into the
user space (vice versa for writes). That is called "indirect" IO due to user space (vice versa for writes). That is called "indirect" IO due to
the double handling of data. There are two methods offered to remove the the double handling of data. There are two methods offered to remove the
redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and redundant copy: 1) direct IO and 2) using the mmap() system call to map
2) using the mmap() system call to map the reserve buffer (this driver has the reserve buffer (this driver has one reserve buffer per fd) into the
one reserve buffer per fd) into the user space. Both have their advantages. user space. Both have their advantages.
In terms of absolute speed mmap() is faster. If speed is not a concern, In terms of absolute speed mmap() is faster. If speed is not a concern,
indirect IO should be fine. Read the documentation for more information. indirect IO should be fine. Read the documentation for more information.
** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' or
needed. That pseudo file's content is defaulted to 0. ** 'echo 1 > /sys/module/sg/parameters/allow_dio' is needed.
That attribute is 0 by default. **
Historical note: this SCSI pass-through driver has been known as "sg" for Historical note: this SCSI pass-through driver has been known as "sg" for
a decade. In broader kernel discussions "sg" is used to refer to scatter a decade. In broader kernel discussions "sg" is used to refer to scatter
...@@ -72,20 +57,17 @@ Major new features in SG 3.x driver (cf SG 2.x drivers) ...@@ -72,20 +57,17 @@ Major new features in SG 3.x driver (cf SG 2.x drivers)
http://www.torque.net/sg/p/sg_v3_ho.html http://www.torque.net/sg/p/sg_v3_ho.html
This is a rendering from DocBook source [change the extension to "sgml" This is a rendering from DocBook source [change the extension to "sgml"
or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon). or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon).
The SG_IO ioctl is now found in other parts kernel (e.g. the block layer).
For more information see http://www.torque.net/sg/sg_io.html
The older, version 2 documents discuss the original sg interface in detail: The older, version 2 documents discuss the original sg interface in detail:
http://www.torque.net/sg/p/scsi-generic.txt http://www.torque.net/sg/p/scsi-generic.txt
http://www.torque.net/sg/p/scsi-generic_long.txt http://www.torque.net/sg/p/scsi-generic_long.txt
A version of this document (potentially out of date) may also be found in Also available: <kernel_source>/Documentation/scsi/scsi-generic.txt
the kernel source tree, probably at:
Documentation/scsi/scsi-generic.txt .
Utility and test programs are available at the sg web site. They are Utility and test programs are available at the sg web site. They are
bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the packaged as sg3_utils (for the lk 2.4 and 2.6 series) and sg_utils
lk 2.4 series). (for the lk 2.2 series).
There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at:
http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO
*/ */
...@@ -238,13 +220,12 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ ...@@ -238,13 +220,12 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_GET_ACCESS_COUNT 0x2289 #define SG_GET_ACCESS_COUNT 0x2289
#define SG_SCATTER_SZ (8 * 4096) /* PAGE_SIZE not available to user */ #define SG_SCATTER_SZ (8 * 4096)
/* Largest size (in bytes) a single scatter-gather list element can have. /* Largest size (in bytes) a single scatter-gather list element can have.
The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on The value used by the driver is 'max(SG_SCATTER_SZ, PAGE_SIZE)'.
i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported This value should be a power of 2 (and may be rounded up internally).
by adapter then this value is the largest data block that can be If scatter-gather is not supported by adapter then this value is the
read/written by a single scsi command. The user can find the value of largest data block that can be read/written by a single scsi command. */
PAGE_SIZE by calling getpagesize() defined in unistd.h . */
#define SG_DEFAULT_RETRIES 0 #define SG_DEFAULT_RETRIES 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment