Commit e5b02087 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag '5.19-rc-ksmbd-server-fixes' of git://git.samba.org/ksmbd

Pull ksmbd server updates from Steve French:

 - rdma (smbdirect) fixes, cleanup and optimizations

 - crediting (flow control) fix for mounts from Windows client

 - ACL fix

 - Windows client query dir fix

 - write validation fix

 - cleanups

* tag '5.19-rc-ksmbd-server-fixes' of git://git.samba.org/ksmbd:
  ksmbd: smbd: relax the count of sges required
  ksmbd: fix outstanding credits related bugs
  ksmbd: smbd: fix connection dropped issue
  ksmbd: Fix some kernel-doc comments
  ksmbd: fix wrong smbd max read/write size check
  ksmbd: add smbd max io size parameter
  ksmbd: handle smb2 query dir request for OutputBufferLength that is too small
  ksmbd: smbd: handle multiple Buffer descriptors
  ksmbd: smbd: change the return value of get_sg_list
  ksmbd: smbd: simplify tracking pending packets
  ksmbd: smbd: introduce read/write credits for RDMA read/write
  ksmbd: smbd: change prototypes of RDMA read/write related functions
  ksmbd: validate length in smb2_write()
  ksmbd: fix reference count leak in smb_check_perm_dacl()
parents 17eabd42 621433b7
...@@ -62,7 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) ...@@ -62,7 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0); atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0); atomic_set(&conn->r_count, 0);
conn->total_credits = 1; conn->total_credits = 1;
conn->outstanding_credits = 1; conn->outstanding_credits = 0;
init_waitqueue_head(&conn->req_running_q); init_waitqueue_head(&conn->req_running_q);
INIT_LIST_HEAD(&conn->conns_list); INIT_LIST_HEAD(&conn->conns_list);
...@@ -205,31 +205,31 @@ int ksmbd_conn_write(struct ksmbd_work *work) ...@@ -205,31 +205,31 @@ int ksmbd_conn_write(struct ksmbd_work *work)
return 0; return 0;
} }
int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
unsigned int buflen, u32 remote_key, u64 remote_offset, void *buf, unsigned int buflen,
u32 remote_len) struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len)
{ {
int ret = -EINVAL; int ret = -EINVAL;
if (conn->transport->ops->rdma_read) if (conn->transport->ops->rdma_read)
ret = conn->transport->ops->rdma_read(conn->transport, ret = conn->transport->ops->rdma_read(conn->transport,
buf, buflen, buf, buflen,
remote_key, remote_offset, desc, desc_len);
remote_len);
return ret; return ret;
} }
int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
unsigned int buflen, u32 remote_key, void *buf, unsigned int buflen,
u64 remote_offset, u32 remote_len) struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len)
{ {
int ret = -EINVAL; int ret = -EINVAL;
if (conn->transport->ops->rdma_write) if (conn->transport->ops->rdma_write)
ret = conn->transport->ops->rdma_write(conn->transport, ret = conn->transport->ops->rdma_write(conn->transport,
buf, buflen, buf, buflen,
remote_key, remote_offset, desc, desc_len);
remote_len);
return ret; return ret;
} }
......
...@@ -122,11 +122,14 @@ struct ksmbd_transport_ops { ...@@ -122,11 +122,14 @@ struct ksmbd_transport_ops {
int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
int size, bool need_invalidate_rkey, int size, bool need_invalidate_rkey,
unsigned int remote_key); unsigned int remote_key);
int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len, int (*rdma_read)(struct ksmbd_transport *t,
u32 remote_key, u64 remote_offset, u32 remote_len); void *buf, unsigned int len,
int (*rdma_write)(struct ksmbd_transport *t, void *buf, struct smb2_buffer_desc_v1 *desc,
unsigned int len, u32 remote_key, u64 remote_offset, unsigned int desc_len);
u32 remote_len); int (*rdma_write)(struct ksmbd_transport *t,
void *buf, unsigned int len,
struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len);
}; };
struct ksmbd_transport { struct ksmbd_transport {
...@@ -148,12 +151,14 @@ struct ksmbd_conn *ksmbd_conn_alloc(void); ...@@ -148,12 +151,14 @@ struct ksmbd_conn *ksmbd_conn_alloc(void);
void ksmbd_conn_free(struct ksmbd_conn *conn); void ksmbd_conn_free(struct ksmbd_conn *conn);
bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
int ksmbd_conn_write(struct ksmbd_work *work); int ksmbd_conn_write(struct ksmbd_work *work);
int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, int ksmbd_conn_rdma_read(struct ksmbd_conn *conn,
unsigned int buflen, u32 remote_key, u64 remote_offset, void *buf, unsigned int buflen,
u32 remote_len); struct smb2_buffer_desc_v1 *desc,
int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, unsigned int desc_len);
unsigned int buflen, u32 remote_key, u64 remote_offset, int ksmbd_conn_rdma_write(struct ksmbd_conn *conn,
u32 remote_len); void *buf, unsigned int buflen,
struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len);
void ksmbd_conn_enqueue_request(struct ksmbd_work *work); void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work); int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops); void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
......
...@@ -104,7 +104,8 @@ struct ksmbd_startup_request { ...@@ -104,7 +104,8 @@ struct ksmbd_startup_request {
*/ */
__u32 sub_auth[3]; /* Subauth value for Security ID */ __u32 sub_auth[3]; /* Subauth value for Security ID */
__u32 smb2_max_credits; /* MAX credits */ __u32 smb2_max_credits; /* MAX credits */
__u32 reserved[128]; /* Reserved room */ __u32 smbd_max_io_size; /* smbd read write size */
__u32 reserved[127]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */ __u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[]; __s8 ____payload[];
}; };
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* wildcard '*' and '?' * wildcard '*' and '?'
* TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR * TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR
* *
* @string: string to compare with a pattern * @str: string to compare with a pattern
* @len: string length * @len: string length
* @pattern: pattern string which might include wildcard '*' and '?' * @pattern: pattern string which might include wildcard '*' and '?'
* *
...@@ -152,8 +152,8 @@ int parse_stream_name(char *filename, char **stream_name, int *s_type) ...@@ -152,8 +152,8 @@ int parse_stream_name(char *filename, char **stream_name, int *s_type)
/** /**
* convert_to_nt_pathname() - extract and return windows path string * convert_to_nt_pathname() - extract and return windows path string
* whose share directory prefix was removed from file path * whose share directory prefix was removed from file path
* @filename : unix filename * @share: ksmbd_share_config pointer
* @sharepath: share path string * @path: path to report
* *
* Return : windows path string or error * Return : windows path string or error
*/ */
...@@ -250,8 +250,8 @@ char *ksmbd_extract_sharename(char *treename) ...@@ -250,8 +250,8 @@ char *ksmbd_extract_sharename(char *treename)
/** /**
* convert_to_unix_name() - convert windows name to unix format * convert_to_unix_name() - convert windows name to unix format
* @path: name to be converted * @share: ksmbd_share_config pointer
* @tid: tree id of mathing share * @name: file name that is relative to share
* *
* Return: converted name on success, otherwise NULL * Return: converted name on success, otherwise NULL
*/ */
......
...@@ -338,7 +338,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, ...@@ -338,7 +338,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
ret = 1; ret = 1;
} }
if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) { if ((u64)conn->outstanding_credits + credit_charge > conn->total_credits) {
ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n", ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
credit_charge, conn->outstanding_credits); credit_charge, conn->outstanding_credits);
ret = 1; ret = 1;
......
...@@ -3938,6 +3938,12 @@ int smb2_query_dir(struct ksmbd_work *work) ...@@ -3938,6 +3938,12 @@ int smb2_query_dir(struct ksmbd_work *work)
set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir); set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir);
rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx); rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx);
/*
* req->OutputBufferLength is too small to contain even one entry.
* In this case, it immediately returns OutputBufferLength 0 to client.
*/
if (!d_info.out_buf_len && !d_info.num_entry)
goto no_buf_len;
if (rc == 0) if (rc == 0)
restart_ctx(&dir_fp->readdir_data.ctx); restart_ctx(&dir_fp->readdir_data.ctx);
if (rc == -ENOSPC) if (rc == -ENOSPC)
...@@ -3964,9 +3970,11 @@ int smb2_query_dir(struct ksmbd_work *work) ...@@ -3964,9 +3970,11 @@ int smb2_query_dir(struct ksmbd_work *work)
rsp->Buffer[0] = 0; rsp->Buffer[0] = 0;
inc_rfc1001_len(work->response_buf, 9); inc_rfc1001_len(work->response_buf, 9);
} else { } else {
no_buf_len:
((struct file_directory_info *) ((struct file_directory_info *)
((char *)rsp->Buffer + d_info.last_entry_offset)) ((char *)rsp->Buffer + d_info.last_entry_offset))
->NextEntryOffset = 0; ->NextEntryOffset = 0;
if (d_info.data_count >= d_info.last_entry_off_align)
d_info.data_count -= d_info.last_entry_off_align; d_info.data_count -= d_info.last_entry_off_align;
rsp->StructureSize = cpu_to_le16(9); rsp->StructureSize = cpu_to_le16(9);
...@@ -6116,7 +6124,6 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) ...@@ -6116,7 +6124,6 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
struct smb2_buffer_desc_v1 *desc, struct smb2_buffer_desc_v1 *desc,
__le32 Channel, __le32 Channel,
__le16 ChannelInfoOffset,
__le16 ChannelInfoLength) __le16 ChannelInfoLength)
{ {
unsigned int i, ch_count; unsigned int i, ch_count;
...@@ -6134,14 +6141,12 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, ...@@ -6134,14 +6141,12 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
le32_to_cpu(desc[i].length)); le32_to_cpu(desc[i].length));
} }
} }
if (ch_count != 1) { if (!ch_count)
ksmbd_debug(RDMA, "RDMA multiple buffer descriptors %d are not supported yet\n",
ch_count);
return -EINVAL; return -EINVAL;
}
work->need_invalidate_rkey = work->need_invalidate_rkey =
(Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
if (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE)
work->remote_key = le32_to_cpu(desc->token); work->remote_key = le32_to_cpu(desc->token);
return 0; return 0;
} }
...@@ -6150,14 +6155,12 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work, ...@@ -6150,14 +6155,12 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
struct smb2_read_req *req, void *data_buf, struct smb2_read_req *req, void *data_buf,
size_t length) size_t length)
{ {
struct smb2_buffer_desc_v1 *desc =
(struct smb2_buffer_desc_v1 *)&req->Buffer[0];
int err; int err;
err = ksmbd_conn_rdma_write(work->conn, data_buf, length, err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
le32_to_cpu(desc->token), (struct smb2_buffer_desc_v1 *)
le64_to_cpu(desc->offset), ((char *)req + le16_to_cpu(req->ReadChannelInfoOffset)),
le32_to_cpu(desc->length)); le16_to_cpu(req->ReadChannelInfoLength));
if (err) if (err)
return err; return err;
...@@ -6180,6 +6183,8 @@ int smb2_read(struct ksmbd_work *work) ...@@ -6180,6 +6183,8 @@ int smb2_read(struct ksmbd_work *work)
size_t length, mincount; size_t length, mincount;
ssize_t nbytes = 0, remain_bytes = 0; ssize_t nbytes = 0, remain_bytes = 0;
int err = 0; int err = 0;
bool is_rdma_channel = false;
unsigned int max_read_size = conn->vals->max_read_size;
WORK_BUFFERS(work, req, rsp); WORK_BUFFERS(work, req, rsp);
...@@ -6191,6 +6196,11 @@ int smb2_read(struct ksmbd_work *work) ...@@ -6191,6 +6196,11 @@ int smb2_read(struct ksmbd_work *work)
if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
req->Channel == SMB2_CHANNEL_RDMA_V1) { req->Channel == SMB2_CHANNEL_RDMA_V1) {
is_rdma_channel = true;
max_read_size = get_smbd_max_read_write_size();
}
if (is_rdma_channel == true) {
unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset); unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset);
if (ch_offset < offsetof(struct smb2_read_req, Buffer)) { if (ch_offset < offsetof(struct smb2_read_req, Buffer)) {
...@@ -6201,7 +6211,6 @@ int smb2_read(struct ksmbd_work *work) ...@@ -6201,7 +6211,6 @@ int smb2_read(struct ksmbd_work *work)
(struct smb2_buffer_desc_v1 *) (struct smb2_buffer_desc_v1 *)
((char *)req + ch_offset), ((char *)req + ch_offset),
req->Channel, req->Channel,
req->ReadChannelInfoOffset,
req->ReadChannelInfoLength); req->ReadChannelInfoLength);
if (err) if (err)
goto out; goto out;
...@@ -6223,9 +6232,9 @@ int smb2_read(struct ksmbd_work *work) ...@@ -6223,9 +6232,9 @@ int smb2_read(struct ksmbd_work *work)
length = le32_to_cpu(req->Length); length = le32_to_cpu(req->Length);
mincount = le32_to_cpu(req->MinimumCount); mincount = le32_to_cpu(req->MinimumCount);
if (length > conn->vals->max_read_size) { if (length > max_read_size) {
ksmbd_debug(SMB, "limiting read size to max size(%u)\n", ksmbd_debug(SMB, "limiting read size to max size(%u)\n",
conn->vals->max_read_size); max_read_size);
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
...@@ -6257,8 +6266,7 @@ int smb2_read(struct ksmbd_work *work) ...@@ -6257,8 +6266,7 @@ int smb2_read(struct ksmbd_work *work)
ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n", ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n",
nbytes, offset, mincount); nbytes, offset, mincount);
if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || if (is_rdma_channel == true) {
req->Channel == SMB2_CHANNEL_RDMA_V1) {
/* write data to the client using rdma channel */ /* write data to the client using rdma channel */
remain_bytes = smb2_read_rdma_channel(work, req, remain_bytes = smb2_read_rdma_channel(work, req,
work->aux_payload_buf, work->aux_payload_buf,
...@@ -6328,10 +6336,6 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work) ...@@ -6328,10 +6336,6 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
length = le32_to_cpu(req->Length); length = le32_to_cpu(req->Length);
id = req->VolatileFileId; id = req->VolatileFileId;
if (le16_to_cpu(req->DataOffset) ==
offsetof(struct smb2_write_req, Buffer)) {
data_buf = (char *)&req->Buffer[0];
} else {
if ((u64)le16_to_cpu(req->DataOffset) + length > if ((u64)le16_to_cpu(req->DataOffset) + length >
get_rfc1002_len(work->request_buf)) { get_rfc1002_len(work->request_buf)) {
pr_err("invalid write data offset %u, smb_len %u\n", pr_err("invalid write data offset %u, smb_len %u\n",
...@@ -6343,7 +6347,6 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work) ...@@ -6343,7 +6347,6 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
data_buf = (char *)(((char *)&req->hdr.ProtocolId) + data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
le16_to_cpu(req->DataOffset)); le16_to_cpu(req->DataOffset));
}
rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length); rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length);
if (rpc_resp) { if (rpc_resp) {
...@@ -6384,21 +6387,18 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work, ...@@ -6384,21 +6387,18 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
struct ksmbd_file *fp, struct ksmbd_file *fp,
loff_t offset, size_t length, bool sync) loff_t offset, size_t length, bool sync)
{ {
struct smb2_buffer_desc_v1 *desc;
char *data_buf; char *data_buf;
int ret; int ret;
ssize_t nbytes; ssize_t nbytes;
desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO); data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
if (!data_buf) if (!data_buf)
return -ENOMEM; return -ENOMEM;
ret = ksmbd_conn_rdma_read(work->conn, data_buf, length, ret = ksmbd_conn_rdma_read(work->conn, data_buf, length,
le32_to_cpu(desc->token), (struct smb2_buffer_desc_v1 *)
le64_to_cpu(desc->offset), ((char *)req + le16_to_cpu(req->WriteChannelInfoOffset)),
le32_to_cpu(desc->length)); le16_to_cpu(req->WriteChannelInfoLength));
if (ret < 0) { if (ret < 0) {
kvfree(data_buf); kvfree(data_buf);
return ret; return ret;
...@@ -6427,8 +6427,9 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6427,8 +6427,9 @@ int smb2_write(struct ksmbd_work *work)
size_t length; size_t length;
ssize_t nbytes; ssize_t nbytes;
char *data_buf; char *data_buf;
bool writethrough = false; bool writethrough = false, is_rdma_channel = false;
int err = 0; int err = 0;
unsigned int max_write_size = work->conn->vals->max_write_size;
WORK_BUFFERS(work, req, rsp); WORK_BUFFERS(work, req, rsp);
...@@ -6437,8 +6438,17 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6437,8 +6438,17 @@ int smb2_write(struct ksmbd_work *work)
return smb2_write_pipe(work); return smb2_write_pipe(work);
} }
offset = le64_to_cpu(req->Offset);
length = le32_to_cpu(req->Length);
if (req->Channel == SMB2_CHANNEL_RDMA_V1 || if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) { req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
is_rdma_channel = true;
max_write_size = get_smbd_max_read_write_size();
length = le32_to_cpu(req->RemainingBytes);
}
if (is_rdma_channel == true) {
unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset); unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset);
if (req->Length != 0 || req->DataOffset != 0 || if (req->Length != 0 || req->DataOffset != 0 ||
...@@ -6450,7 +6460,6 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6450,7 +6460,6 @@ int smb2_write(struct ksmbd_work *work)
(struct smb2_buffer_desc_v1 *) (struct smb2_buffer_desc_v1 *)
((char *)req + ch_offset), ((char *)req + ch_offset),
req->Channel, req->Channel,
req->WriteChannelInfoOffset,
req->WriteChannelInfoLength); req->WriteChannelInfoLength);
if (err) if (err)
goto out; goto out;
...@@ -6474,12 +6483,9 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6474,12 +6483,9 @@ int smb2_write(struct ksmbd_work *work)
goto out; goto out;
} }
offset = le64_to_cpu(req->Offset); if (length > max_write_size) {
length = le32_to_cpu(req->Length);
if (length > work->conn->vals->max_write_size) {
ksmbd_debug(SMB, "limiting write size to max size(%u)\n", ksmbd_debug(SMB, "limiting write size to max size(%u)\n",
work->conn->vals->max_write_size); max_write_size);
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
...@@ -6487,12 +6493,7 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6487,12 +6493,7 @@ int smb2_write(struct ksmbd_work *work)
if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
writethrough = true; writethrough = true;
if (req->Channel != SMB2_CHANNEL_RDMA_V1 && if (is_rdma_channel == false) {
req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
if (le16_to_cpu(req->DataOffset) ==
offsetof(struct smb2_write_req, Buffer)) {
data_buf = (char *)&req->Buffer[0];
} else {
if ((u64)le16_to_cpu(req->DataOffset) + length > if ((u64)le16_to_cpu(req->DataOffset) + length >
get_rfc1002_len(work->request_buf)) { get_rfc1002_len(work->request_buf)) {
pr_err("invalid write data offset %u, smb_len %u\n", pr_err("invalid write data offset %u, smb_len %u\n",
...@@ -6501,10 +6502,8 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6501,10 +6502,8 @@ int smb2_write(struct ksmbd_work *work)
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
data_buf = (char *)(((char *)&req->hdr.ProtocolId) + data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
le16_to_cpu(req->DataOffset)); le16_to_cpu(req->DataOffset));
}
ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
...@@ -6520,8 +6519,7 @@ int smb2_write(struct ksmbd_work *work) ...@@ -6520,8 +6519,7 @@ int smb2_write(struct ksmbd_work *work)
/* read data from the client using rdma channel, and /* read data from the client using rdma channel, and
* write the data. * write the data.
*/ */
nbytes = smb2_write_rdma_channel(work, req, fp, offset, nbytes = smb2_write_rdma_channel(work, req, fp, offset, length,
le32_to_cpu(req->RemainingBytes),
writethrough); writethrough);
if (nbytes < 0) { if (nbytes < 0) {
err = (int)nbytes; err = (int)nbytes;
......
...@@ -140,8 +140,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work) ...@@ -140,8 +140,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work)
hdr = work->request_buf; hdr = work->request_buf;
if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER && if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER &&
hdr->Command == SMB_COM_NEGOTIATE) hdr->Command == SMB_COM_NEGOTIATE) {
work->conn->outstanding_credits++;
return 0; return 0;
}
return -EINVAL; return -EINVAL;
} }
......
...@@ -1261,6 +1261,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, ...@@ -1261,6 +1261,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
if (!access_bits) if (!access_bits)
access_bits = access_bits =
SET_MINIMUM_RIGHTS; SET_MINIMUM_RIGHTS;
posix_acl_release(posix_acls);
goto check_access_bits; goto check_access_bits;
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "mgmt/ksmbd_ida.h" #include "mgmt/ksmbd_ida.h"
#include "connection.h" #include "connection.h"
#include "transport_tcp.h" #include "transport_tcp.h"
#include "transport_rdma.h"
#define IPC_WAIT_TIMEOUT (2 * HZ) #define IPC_WAIT_TIMEOUT (2 * HZ)
...@@ -303,6 +304,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) ...@@ -303,6 +304,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
init_smb2_max_trans_size(req->smb2_max_trans); init_smb2_max_trans_size(req->smb2_max_trans);
if (req->smb2_max_credits) if (req->smb2_max_credits)
init_smb2_max_credits(req->smb2_max_credits); init_smb2_max_credits(req->smb2_max_credits);
if (req->smbd_max_io_size)
init_smbd_max_io_size(req->smbd_max_io_size);
ret = ksmbd_set_netbios_name(req->netbios_name); ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string); ret |= ksmbd_set_server_string(req->server_string);
......
...@@ -80,9 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024; ...@@ -80,9 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */ /* The maximum single-message size which can be received */
static int smb_direct_max_receive_size = 8192; static int smb_direct_max_receive_size = 8192;
static int smb_direct_max_read_write_size = 524224; static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
static int smb_direct_max_outstanding_rw_ops = 8;
static LIST_HEAD(smb_direct_device_list); static LIST_HEAD(smb_direct_device_list);
static DEFINE_RWLOCK(smb_direct_device_lock); static DEFINE_RWLOCK(smb_direct_device_lock);
...@@ -147,18 +145,18 @@ struct smb_direct_transport { ...@@ -147,18 +145,18 @@ struct smb_direct_transport {
atomic_t send_credits; atomic_t send_credits;
spinlock_t lock_new_recv_credits; spinlock_t lock_new_recv_credits;
int new_recv_credits; int new_recv_credits;
atomic_t rw_avail_ops; int max_rw_credits;
int pages_per_rw_credit;
atomic_t rw_credits;
wait_queue_head_t wait_send_credits; wait_queue_head_t wait_send_credits;
wait_queue_head_t wait_rw_avail_ops; wait_queue_head_t wait_rw_credits;
mempool_t *sendmsg_mempool; mempool_t *sendmsg_mempool;
struct kmem_cache *sendmsg_cache; struct kmem_cache *sendmsg_cache;
mempool_t *recvmsg_mempool; mempool_t *recvmsg_mempool;
struct kmem_cache *recvmsg_cache; struct kmem_cache *recvmsg_cache;
wait_queue_head_t wait_send_payload_pending;
atomic_t send_payload_pending;
wait_queue_head_t wait_send_pending; wait_queue_head_t wait_send_pending;
atomic_t send_pending; atomic_t send_pending;
...@@ -208,12 +206,25 @@ struct smb_direct_recvmsg { ...@@ -208,12 +206,25 @@ struct smb_direct_recvmsg {
struct smb_direct_rdma_rw_msg { struct smb_direct_rdma_rw_msg {
struct smb_direct_transport *t; struct smb_direct_transport *t;
struct ib_cqe cqe; struct ib_cqe cqe;
int status;
struct completion *completion; struct completion *completion;
struct list_head list;
struct rdma_rw_ctx rw_ctx; struct rdma_rw_ctx rw_ctx;
struct sg_table sgt; struct sg_table sgt;
struct scatterlist sg_list[]; struct scatterlist sg_list[];
}; };
void init_smbd_max_io_size(unsigned int sz)
{
sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
smb_direct_max_read_write_size = sz;
}
unsigned int get_smbd_max_read_write_size(void)
{
return smb_direct_max_read_write_size;
}
static inline int get_buf_page_count(void *buf, int size) static inline int get_buf_page_count(void *buf, int size)
{ {
return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
...@@ -377,7 +388,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) ...@@ -377,7 +388,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
t->reassembly_queue_length = 0; t->reassembly_queue_length = 0;
init_waitqueue_head(&t->wait_reassembly_queue); init_waitqueue_head(&t->wait_reassembly_queue);
init_waitqueue_head(&t->wait_send_credits); init_waitqueue_head(&t->wait_send_credits);
init_waitqueue_head(&t->wait_rw_avail_ops); init_waitqueue_head(&t->wait_rw_credits);
spin_lock_init(&t->receive_credit_lock); spin_lock_init(&t->receive_credit_lock);
spin_lock_init(&t->recvmsg_queue_lock); spin_lock_init(&t->recvmsg_queue_lock);
...@@ -386,8 +397,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) ...@@ -386,8 +397,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
spin_lock_init(&t->empty_recvmsg_queue_lock); spin_lock_init(&t->empty_recvmsg_queue_lock);
INIT_LIST_HEAD(&t->empty_recvmsg_queue); INIT_LIST_HEAD(&t->empty_recvmsg_queue);
init_waitqueue_head(&t->wait_send_payload_pending);
atomic_set(&t->send_payload_pending, 0);
init_waitqueue_head(&t->wait_send_pending); init_waitqueue_head(&t->wait_send_pending);
atomic_set(&t->send_pending, 0); atomic_set(&t->send_pending, 0);
...@@ -417,8 +426,6 @@ static void free_transport(struct smb_direct_transport *t) ...@@ -417,8 +426,6 @@ static void free_transport(struct smb_direct_transport *t)
wake_up_interruptible(&t->wait_send_credits); wake_up_interruptible(&t->wait_send_credits);
ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
wait_event(t->wait_send_payload_pending,
atomic_read(&t->send_payload_pending) == 0);
wait_event(t->wait_send_pending, wait_event(t->wait_send_pending,
atomic_read(&t->send_pending) == 0); atomic_read(&t->send_pending) == 0);
...@@ -569,6 +576,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -569,6 +576,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
} }
t->negotiation_requested = true; t->negotiation_requested = true;
t->full_packet_received = true; t->full_packet_received = true;
t->status = SMB_DIRECT_CS_CONNECTED;
enqueue_reassembly(t, recvmsg, 0); enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status); wake_up_interruptible(&t->wait_status);
break; break;
...@@ -873,13 +881,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -873,13 +881,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
smb_direct_disconnect_rdma_connection(t); smb_direct_disconnect_rdma_connection(t);
} }
if (sendmsg->num_sge > 1) {
if (atomic_dec_and_test(&t->send_payload_pending))
wake_up(&t->wait_send_payload_pending);
} else {
if (atomic_dec_and_test(&t->send_pending)) if (atomic_dec_and_test(&t->send_pending))
wake_up(&t->wait_send_pending); wake_up(&t->wait_send_pending);
}
/* iterate and free the list of messages in reverse. the list's head /* iterate and free the list of messages in reverse. the list's head
* is invalid. * is invalid.
...@@ -911,21 +914,12 @@ static int smb_direct_post_send(struct smb_direct_transport *t, ...@@ -911,21 +914,12 @@ static int smb_direct_post_send(struct smb_direct_transport *t,
{ {
int ret; int ret;
if (wr->num_sge > 1)
atomic_inc(&t->send_payload_pending);
else
atomic_inc(&t->send_pending); atomic_inc(&t->send_pending);
ret = ib_post_send(t->qp, wr, NULL); ret = ib_post_send(t->qp, wr, NULL);
if (ret) { if (ret) {
pr_err("failed to post send: %d\n", ret); pr_err("failed to post send: %d\n", ret);
if (wr->num_sge > 1) {
if (atomic_dec_and_test(&t->send_payload_pending))
wake_up(&t->wait_send_payload_pending);
} else {
if (atomic_dec_and_test(&t->send_pending)) if (atomic_dec_and_test(&t->send_pending))
wake_up(&t->wait_send_pending); wake_up(&t->wait_send_pending);
}
smb_direct_disconnect_rdma_connection(t); smb_direct_disconnect_rdma_connection(t);
} }
return ret; return ret;
...@@ -983,17 +977,18 @@ static int smb_direct_flush_send_list(struct smb_direct_transport *t, ...@@ -983,17 +977,18 @@ static int smb_direct_flush_send_list(struct smb_direct_transport *t,
} }
static int wait_for_credits(struct smb_direct_transport *t, static int wait_for_credits(struct smb_direct_transport *t,
wait_queue_head_t *waitq, atomic_t *credits) wait_queue_head_t *waitq, atomic_t *total_credits,
int needed)
{ {
int ret; int ret;
do { do {
if (atomic_dec_return(credits) >= 0) if (atomic_sub_return(needed, total_credits) >= 0)
return 0; return 0;
atomic_inc(credits); atomic_add(needed, total_credits);
ret = wait_event_interruptible(*waitq, ret = wait_event_interruptible(*waitq,
atomic_read(credits) > 0 || atomic_read(total_credits) >= needed ||
t->status != SMB_DIRECT_CS_CONNECTED); t->status != SMB_DIRECT_CS_CONNECTED);
if (t->status != SMB_DIRECT_CS_CONNECTED) if (t->status != SMB_DIRECT_CS_CONNECTED)
...@@ -1015,7 +1010,19 @@ static int wait_for_send_credits(struct smb_direct_transport *t, ...@@ -1015,7 +1010,19 @@ static int wait_for_send_credits(struct smb_direct_transport *t,
return ret; return ret;
} }
return wait_for_credits(t, &t->wait_send_credits, &t->send_credits); return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
}
static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
{
return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
}
static int calc_rw_credits(struct smb_direct_transport *t,
char *buf, unsigned int len)
{
return DIV_ROUND_UP(get_buf_page_count(buf, len),
t->pages_per_rw_credit);
} }
static int smb_direct_create_header(struct smb_direct_transport *t, static int smb_direct_create_header(struct smb_direct_transport *t,
...@@ -1086,7 +1093,7 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen ...@@ -1086,7 +1093,7 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen
int offset, len; int offset, len;
int i = 0; int i = 0;
if (nentries < get_buf_page_count(buf, size)) if (size <= 0 || nentries < get_buf_page_count(buf, size))
return -EINVAL; return -EINVAL;
offset = offset_in_page(buf); offset = offset_in_page(buf);
...@@ -1118,7 +1125,7 @@ static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, ...@@ -1118,7 +1125,7 @@ static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
int npages; int npages;
npages = get_sg_list(buf, size, sg_list, nentries); npages = get_sg_list(buf, size, sg_list, nentries);
if (npages <= 0) if (npages < 0)
return -EINVAL; return -EINVAL;
return ib_dma_map_sg(device, sg_list, npages, dir); return ib_dma_map_sg(device, sg_list, npages, dir);
} }
...@@ -1313,11 +1320,21 @@ static int smb_direct_writev(struct ksmbd_transport *t, ...@@ -1313,11 +1320,21 @@ static int smb_direct_writev(struct ksmbd_transport *t,
* that means all the I/Os have been out and we are good to return * that means all the I/Os have been out and we are good to return
*/ */
wait_event(st->wait_send_payload_pending, wait_event(st->wait_send_pending,
atomic_read(&st->send_payload_pending) == 0); atomic_read(&st->send_pending) == 0);
return ret; return ret;
} }
static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
struct smb_direct_rdma_rw_msg *msg,
enum dma_data_direction dir)
{
rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
msg->sgt.sgl, msg->sgt.nents, dir);
sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
kfree(msg);
}
static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
enum dma_data_direction dir) enum dma_data_direction dir)
{ {
...@@ -1326,19 +1343,14 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, ...@@ -1326,19 +1343,14 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
struct smb_direct_transport *t = msg->t; struct smb_direct_transport *t = msg->t;
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
msg->status = -EIO;
pr_err("read/write error. opcode = %d, status = %s(%d)\n", pr_err("read/write error. opcode = %d, status = %s(%d)\n",
wc->opcode, ib_wc_status_msg(wc->status), wc->status); wc->opcode, ib_wc_status_msg(wc->status), wc->status);
if (wc->status != IB_WC_WR_FLUSH_ERR)
smb_direct_disconnect_rdma_connection(t); smb_direct_disconnect_rdma_connection(t);
} }
if (atomic_inc_return(&t->rw_avail_ops) > 0)
wake_up(&t->wait_rw_avail_ops);
rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
msg->sg_list, msg->sgt.nents, dir);
sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
complete(msg->completion); complete(msg->completion);
kfree(msg);
} }
static void read_done(struct ib_cq *cq, struct ib_wc *wc) static void read_done(struct ib_cq *cq, struct ib_wc *wc)
...@@ -1351,94 +1363,141 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -1351,94 +1363,141 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc)
read_write_done(cq, wc, DMA_TO_DEVICE); read_write_done(cq, wc, DMA_TO_DEVICE);
} }
static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf, static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
int buf_len, u32 remote_key, u64 remote_offset, void *buf, int buf_len,
u32 remote_len, bool is_read) struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len,
bool is_read)
{ {
struct smb_direct_rdma_rw_msg *msg; struct smb_direct_rdma_rw_msg *msg, *next_msg;
int ret; int i, ret;
DECLARE_COMPLETION_ONSTACK(completion); DECLARE_COMPLETION_ONSTACK(completion);
struct ib_send_wr *first_wr = NULL; struct ib_send_wr *first_wr;
LIST_HEAD(msg_list);
char *desc_buf;
int credits_needed;
unsigned int desc_buf_len;
size_t total_length = 0;
if (t->status != SMB_DIRECT_CS_CONNECTED)
return -ENOTCONN;
/* calculate needed credits */
credits_needed = 0;
desc_buf = buf;
for (i = 0; i < desc_len / sizeof(*desc); i++) {
desc_buf_len = le32_to_cpu(desc[i].length);
credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
desc_buf += desc_buf_len;
total_length += desc_buf_len;
if (desc_buf_len == 0 || total_length > buf_len ||
total_length > t->max_rdma_rw_size)
return -EINVAL;
}
ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
is_read ? "read" : "write", buf_len, credits_needed);
ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops); ret = wait_for_rw_credits(t, credits_needed);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* TODO: mempool */ /* build rdma_rw_ctx for each descriptor */
msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + desc_buf = buf;
for (i = 0; i < desc_len / sizeof(*desc); i++) {
msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
if (!msg) { if (!msg) {
atomic_inc(&t->rw_avail_ops); ret = -ENOMEM;
return -ENOMEM; goto out;
} }
desc_buf_len = le32_to_cpu(desc[i].length);
msg->t = t;
msg->cqe.done = is_read ? read_done : write_done;
msg->completion = &completion;
msg->sgt.sgl = &msg->sg_list[0]; msg->sgt.sgl = &msg->sg_list[0];
ret = sg_alloc_table_chained(&msg->sgt, ret = sg_alloc_table_chained(&msg->sgt,
get_buf_page_count(buf, buf_len), get_buf_page_count(desc_buf, desc_buf_len),
msg->sg_list, SG_CHUNK_SIZE); msg->sg_list, SG_CHUNK_SIZE);
if (ret) { if (ret) {
atomic_inc(&t->rw_avail_ops);
kfree(msg); kfree(msg);
return -ENOMEM; ret = -ENOMEM;
goto out;
} }
ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents); ret = get_sg_list(desc_buf, desc_buf_len,
if (ret <= 0) { msg->sgt.sgl, msg->sgt.orig_nents);
pr_err("failed to get pages\n"); if (ret < 0) {
goto err; sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
kfree(msg);
goto out;
} }
ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
msg->sg_list, get_buf_page_count(buf, buf_len), msg->sgt.sgl,
0, remote_offset, remote_key, get_buf_page_count(desc_buf, desc_buf_len),
0,
le64_to_cpu(desc[i].offset),
le32_to_cpu(desc[i].token),
is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
if (ret < 0) { if (ret < 0) {
pr_err("failed to init rdma_rw_ctx: %d\n", ret); pr_err("failed to init rdma_rw_ctx: %d\n", ret);
goto err; sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
kfree(msg);
goto out;
} }
msg->t = t; list_add_tail(&msg->list, &msg_list);
msg->cqe.done = is_read ? read_done : write_done; desc_buf += desc_buf_len;
msg->completion = &completion; }
/* concatenate work requests of rdma_rw_ctxs */
first_wr = NULL;
list_for_each_entry_reverse(msg, &msg_list, list) {
first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
&msg->cqe, NULL); &msg->cqe, first_wr);
}
ret = ib_post_send(t->qp, first_wr, NULL); ret = ib_post_send(t->qp, first_wr, NULL);
if (ret) { if (ret) {
pr_err("failed to post send wr: %d\n", ret); pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
goto err; goto out;
} }
msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
wait_for_completion(&completion); wait_for_completion(&completion);
return 0; ret = msg->status;
out:
err: list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
atomic_inc(&t->rw_avail_ops); list_del(&msg->list);
if (first_wr) smb_direct_free_rdma_rw_msg(t, msg,
rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
msg->sg_list, msg->sgt.nents,
is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); }
kfree(msg); atomic_add(credits_needed, &t->rw_credits);
wake_up(&t->wait_rw_credits);
return ret; return ret;
} }
static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf, static int smb_direct_rdma_write(struct ksmbd_transport *t,
unsigned int buflen, u32 remote_key, void *buf, unsigned int buflen,
u64 remote_offset, u32 remote_len) struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len)
{ {
return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
remote_key, remote_offset, desc, desc_len, false);
remote_len, false);
} }
static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf, static int smb_direct_rdma_read(struct ksmbd_transport *t,
unsigned int buflen, u32 remote_key, void *buf, unsigned int buflen,
u64 remote_offset, u32 remote_len) struct smb2_buffer_desc_v1 *desc,
unsigned int desc_len)
{ {
return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
remote_key, remote_offset, desc, desc_len, true);
remote_len, true);
} }
static void smb_direct_disconnect(struct ksmbd_transport *t) static void smb_direct_disconnect(struct ksmbd_transport *t)
...@@ -1638,41 +1697,57 @@ static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) ...@@ -1638,41 +1697,57 @@ static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
return ret; return ret;
} }
static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
{
return min_t(unsigned int,
t->cm_id->device->attrs.max_fast_reg_page_list_len,
256);
}
static int smb_direct_init_params(struct smb_direct_transport *t, static int smb_direct_init_params(struct smb_direct_transport *t,
struct ib_qp_cap *cap) struct ib_qp_cap *cap)
{ {
struct ib_device *device = t->cm_id->device; struct ib_device *device = t->cm_id->device;
int max_send_sges, max_pages, max_rw_wrs, max_send_wrs; int max_send_sges, max_rw_wrs, max_send_wrs;
unsigned int max_sge_per_wr, wrs_per_credit;
/* need 2 more sge. because a SMB_DIRECT header will be mapped, /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
* and maybe a send buffer could be not page aligned. * SMB2 response could be mapped.
*/ */
t->max_send_size = smb_direct_max_send_size; t->max_send_size = smb_direct_max_send_size;
max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2; max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
pr_err("max_send_size %d is too large\n", t->max_send_size); pr_err("max_send_size %d is too large\n", t->max_send_size);
return -EINVAL; return -EINVAL;
} }
/* /* Calculate the number of work requests for RDMA R/W.
* allow smb_direct_max_outstanding_rw_ops of in-flight RDMA * The maximum number of pages which can be registered
* read/writes. HCA guarantees at least max_send_sge of sges for * with one Memory region can be transferred with one
* a RDMA read/write work request, and if memory registration is used, * R/W credit. And at least 4 work requests for each credit
* we need reg_mr, local_inv wrs for each read/write. * are needed for MR registration, RDMA R/W, local & remote
* MR invalidation.
*/ */
t->max_rdma_rw_size = smb_direct_max_read_write_size; t->max_rdma_rw_size = smb_direct_max_read_write_size;
max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES); t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num, (t->pages_per_rw_credit - 1) *
max_pages) * 2; PAGE_SIZE);
max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
device->attrs.max_sge_rd);
max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
max_send_sges);
wrs_per_credit = max_t(unsigned int, 4,
DIV_ROUND_UP(t->pages_per_rw_credit,
max_sge_per_wr) + 1);
max_rw_wrs = t->max_rw_credits * wrs_per_credit;
max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
if (max_send_wrs > device->attrs.max_cqe || if (max_send_wrs > device->attrs.max_cqe ||
max_send_wrs > device->attrs.max_qp_wr) { max_send_wrs > device->attrs.max_qp_wr) {
pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n", pr_err("consider lowering send_credit_target = %d\n",
smb_direct_send_credit_target, smb_direct_send_credit_target);
smb_direct_max_outstanding_rw_ops);
pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
device->attrs.max_cqe, device->attrs.max_qp_wr); device->attrs.max_cqe, device->attrs.max_qp_wr);
return -EINVAL; return -EINVAL;
...@@ -1687,11 +1762,6 @@ static int smb_direct_init_params(struct smb_direct_transport *t, ...@@ -1687,11 +1762,6 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
return -EINVAL; return -EINVAL;
} }
if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
pr_err("warning: device max_send_sge = %d too small\n",
device->attrs.max_send_sge);
return -EINVAL;
}
if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
pr_err("warning: device max_recv_sge = %d too small\n", pr_err("warning: device max_recv_sge = %d too small\n",
device->attrs.max_recv_sge); device->attrs.max_recv_sge);
...@@ -1707,7 +1777,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t, ...@@ -1707,7 +1777,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
t->send_credit_target = smb_direct_send_credit_target; t->send_credit_target = smb_direct_send_credit_target;
atomic_set(&t->send_credits, 0); atomic_set(&t->send_credits, 0);
atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops); atomic_set(&t->rw_credits, t->max_rw_credits);
t->max_send_size = smb_direct_max_send_size; t->max_send_size = smb_direct_max_send_size;
t->max_recv_size = smb_direct_max_receive_size; t->max_recv_size = smb_direct_max_receive_size;
...@@ -1715,12 +1785,10 @@ static int smb_direct_init_params(struct smb_direct_transport *t, ...@@ -1715,12 +1785,10 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_wr = max_send_wrs; cap->max_send_wr = max_send_wrs;
cap->max_recv_wr = t->recv_credit_max; cap->max_recv_wr = t->recv_credit_max;
cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES; cap->max_send_sge = max_sge_per_wr;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0; cap->max_inline_data = 0;
cap->max_rdma_ctxs = cap->max_rdma_ctxs = t->max_rw_credits;
rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
smb_direct_max_outstanding_rw_ops;
return 0; return 0;
} }
...@@ -1813,7 +1881,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, ...@@ -1813,7 +1881,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
} }
t->send_cq = ib_alloc_cq(t->cm_id->device, t, t->send_cq = ib_alloc_cq(t->cm_id->device, t,
t->send_credit_target, 0, IB_POLL_WORKQUEUE); smb_direct_send_credit_target + cap->max_rdma_ctxs,
0, IB_POLL_WORKQUEUE);
if (IS_ERR(t->send_cq)) { if (IS_ERR(t->send_cq)) {
pr_err("Can't create RDMA send CQ\n"); pr_err("Can't create RDMA send CQ\n");
ret = PTR_ERR(t->send_cq); ret = PTR_ERR(t->send_cq);
...@@ -1822,8 +1891,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, ...@@ -1822,8 +1891,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
} }
t->recv_cq = ib_alloc_cq(t->cm_id->device, t, t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
cap->max_send_wr + cap->max_rdma_ctxs, t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
0, IB_POLL_WORKQUEUE);
if (IS_ERR(t->recv_cq)) { if (IS_ERR(t->recv_cq)) {
pr_err("Can't create RDMA recv CQ\n"); pr_err("Can't create RDMA recv CQ\n");
ret = PTR_ERR(t->recv_cq); ret = PTR_ERR(t->recv_cq);
...@@ -1852,17 +1920,12 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, ...@@ -1852,17 +1920,12 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
int pages_per_mr, mr_count; ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
t->max_rw_credits, IB_MR_TYPE_MEM_REG,
pages_per_mr = min_t(int, pages_per_rw, t->pages_per_rw_credit, 0);
t->cm_id->device->attrs.max_fast_reg_page_list_len);
mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
atomic_read(&t->rw_avail_ops);
ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
if (ret) { if (ret) {
pr_err("failed to init mr pool count %d pages %d\n", pr_err("failed to init mr pool count %d pages %d\n",
mr_count, pages_per_mr); t->max_rw_credits, t->pages_per_rw_credit);
goto err; goto err;
} }
} }
......
...@@ -7,6 +7,10 @@ ...@@ -7,6 +7,10 @@
#ifndef __KSMBD_TRANSPORT_RDMA_H__ #ifndef __KSMBD_TRANSPORT_RDMA_H__
#define __KSMBD_TRANSPORT_RDMA_H__ #define __KSMBD_TRANSPORT_RDMA_H__
#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024)
#define SMBD_MIN_IOSIZE (512 * 1024)
#define SMBD_MAX_IOSIZE (16 * 1024 * 1024)
/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */ /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
struct smb_direct_negotiate_req { struct smb_direct_negotiate_req {
__le16 min_version; __le16 min_version;
...@@ -52,10 +56,14 @@ struct smb_direct_data_transfer { ...@@ -52,10 +56,14 @@ struct smb_direct_data_transfer {
int ksmbd_rdma_init(void); int ksmbd_rdma_init(void);
void ksmbd_rdma_destroy(void); void ksmbd_rdma_destroy(void);
bool ksmbd_rdma_capable_netdev(struct net_device *netdev); bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
void init_smbd_max_io_size(unsigned int sz);
unsigned int get_smbd_max_read_write_size(void);
#else #else
static inline int ksmbd_rdma_init(void) { return 0; } static inline int ksmbd_rdma_init(void) { return 0; }
static inline int ksmbd_rdma_destroy(void) { return 0; } static inline int ksmbd_rdma_destroy(void) { return 0; }
static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
static inline void init_smbd_max_io_size(unsigned int sz) { }
static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
#endif #endif
#endif /* __KSMBD_TRANSPORT_RDMA_H__ */ #endif /* __KSMBD_TRANSPORT_RDMA_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment