Commit e0332629 authored by Harald Freudenberger's avatar Harald Freudenberger Committed by Vasily Gorbik

s390/ap/zcrypt: revisit ap and zcrypt error handling

Revisit the ap queue error handling: Based on discussions and
evaluatios with the firmware folk here is now a rework of the response
code handling for all the AP instructions. The idea is to distinguish
between failures because of some kind of invalid request where a retry
does not make any sense and a failure where another attempt to send
the very same request may succeed. The first case is handled by
returning EINVAL to the userspace application. The second case results
in retries within the zcrypt API controlled by a per message retry
counter.

Revisit the zcrpyt error handling: Similar here, based on discussions
with the firmware people here comes a rework of the handling of all
the reply codes.  Main point here is that there are only very few
cases left, where a zcrypt device queue is switched to offline. It
should never be the case that an AP reply message is 'unknown' to the
device driver as it indicates a total mismatch between device driver
and crypto card firmware. In all other cases, the code distinguishes
between failure because of invalid message (see above - EINVAL) or
failures of the infrastructure (see above - EAGAIN).
Signed-off-by: default avatarHarald Freudenberger <freude@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parent 5caa2af9
......@@ -50,6 +50,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
#define AP_RESPONSE_NO_FIRST_PART 0x13
#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15
#define AP_RESPONSE_REQ_FAC_NOT_INST 0x16
#define AP_RESPONSE_INVALID_DOMAIN 0x42
/*
* Known device types
......
......@@ -241,6 +241,9 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
return AP_SM_WAIT_TIMEOUT;
case AP_RESPONSE_INVALID_DOMAIN:
AP_DBF(DBF_WARN, "AP_RESPONSE_INVALID_DOMAIN on NQAP\n");
fallthrough;
case AP_RESPONSE_MESSAGE_TOO_BIG:
case AP_RESPONSE_REQ_FAC_NOT_INST:
list_del_init(&ap_msg->list);
......@@ -286,11 +289,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
aq->sm_state = AP_SM_STATE_RESET_WAIT;
aq->interrupt = AP_INTR_DISABLED;
return AP_SM_WAIT_TIMEOUT;
case AP_RESPONSE_BUSY:
return AP_SM_WAIT_TIMEOUT;
case AP_RESPONSE_Q_NOT_AVAIL:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
default:
aq->dev_state = AP_DEV_STATE_ERROR;
aq->last_err_rc = status.response_code;
......
......@@ -21,6 +21,14 @@
#define ZCRYPT_DBF(...) \
debug_sprintf_event(zcrypt_dbf_info, ##__VA_ARGS__)
#define ZCRYPT_DBF_ERR(...) \
debug_sprintf_event(zcrypt_dbf_info, DBF_ERR, ##__VA_ARGS__)
#define ZCRYPT_DBF_WARN(...) \
debug_sprintf_event(zcrypt_dbf_info, DBF_WARN, ##__VA_ARGS__)
#define ZCRYPT_DBF_INFO(...) \
debug_sprintf_event(zcrypt_dbf_info, DBF_INFO, ##__VA_ARGS__)
#define ZCRYPT_DBF_DBG(...) \
debug_sprintf_event(zcrypt_dbf_info, DBF_DEBUG, ##__VA_ARGS__)
extern debug_info_t *zcrypt_dbf_info;
......
......@@ -52,7 +52,6 @@ struct error_hdr {
#define REP82_ERROR_INVALID_COMMAND 0x30
#define REP82_ERROR_MALFORMED_MSG 0x40
#define REP82_ERROR_INVALID_SPECIAL_CMD 0x41
#define REP82_ERROR_INVALID_DOMAIN_PRECHECK 0x42
#define REP82_ERROR_RESERVED_FIELDO 0x50 /* old value */
#define REP82_ERROR_WORD_ALIGNMENT 0x60
#define REP82_ERROR_MESSAGE_LENGTH 0x80
......@@ -67,7 +66,6 @@ struct error_hdr {
#define REP82_ERROR_ZERO_BUFFER_LEN 0xB0
#define REP88_ERROR_MODULE_FAILURE 0x10
#define REP88_ERROR_MESSAGE_TYPE 0x20
#define REP88_ERROR_MESSAGE_MALFORMD 0x22
#define REP88_ERROR_MESSAGE_LENGTH 0x23
......@@ -85,78 +83,56 @@ static inline int convert_error(struct zcrypt_queue *zq,
int queue = AP_QID_QUEUE(zq->queue->qid);
switch (ehdr->reply_code) {
case REP82_ERROR_OPERAND_INVALID:
case REP82_ERROR_OPERAND_SIZE:
case REP82_ERROR_EVEN_MOD_IN_OPND:
case REP88_ERROR_MESSAGE_MALFORMD:
case REP82_ERROR_INVALID_DOMAIN_PRECHECK:
case REP82_ERROR_INVALID_DOMAIN_PENDING:
case REP82_ERROR_INVALID_SPECIAL_CMD:
case REP82_ERROR_FILTERED_BY_HYPERVISOR:
// REP88_ERROR_INVALID_KEY // '82' CEX2A
// REP88_ERROR_OPERAND // '84' CEX2A
// REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A
/* Invalid input data. */
case REP82_ERROR_INVALID_MSG_LEN: /* 0x23 */
case REP82_ERROR_RESERVD_FIELD: /* 0x24 */
case REP82_ERROR_FORMAT_FIELD: /* 0x29 */
case REP82_ERROR_MALFORMED_MSG: /* 0x40 */
case REP82_ERROR_INVALID_SPECIAL_CMD: /* 0x41 */
case REP82_ERROR_MESSAGE_LENGTH: /* 0x80 */
case REP82_ERROR_OPERAND_INVALID: /* 0x82 */
case REP82_ERROR_OPERAND_SIZE: /* 0x84 */
case REP82_ERROR_EVEN_MOD_IN_OPND: /* 0x85 */
case REP82_ERROR_INVALID_DOMAIN_PENDING: /* 0x8A */
case REP82_ERROR_FILTERED_BY_HYPERVISOR: /* 0x8B */
case REP82_ERROR_PACKET_TRUNCATED: /* 0xA0 */
case REP88_ERROR_MESSAGE_MALFORMD: /* 0x22 */
case REP88_ERROR_KEY_TYPE: /* 0x34 */
/* RY indicates malformed request */
ZCRYPT_DBF(DBF_WARN,
"device=%02x.%04x reply=0x%02x => rc=EINVAL\n",
"dev=%02x.%04x RY=0x%02x => rc=EINVAL\n",
card, queue, ehdr->reply_code);
return -EINVAL;
case REP82_ERROR_MESSAGE_TYPE:
// REP88_ERROR_MESSAGE_TYPE // '20' CEX2A
case REP82_ERROR_MACHINE_FAILURE: /* 0x10 */
case REP82_ERROR_MESSAGE_TYPE: /* 0x20 */
case REP82_ERROR_TRANSPORT_FAIL: /* 0x90 */
/*
* To sent a message of the wrong type is a bug in the
* device driver. Send error msg, disable the device
* and then repeat the request.
* Msg to wrong type or card/infrastructure failure.
* Trigger rescan of the ap bus, trigger retry request.
*/
atomic_set(&zcrypt_rescan_req, 1);
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
card, queue);
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
card, queue, ehdr->reply_code);
return -EAGAIN;
case REP82_ERROR_TRANSPORT_FAIL:
/* Card or infrastructure failure, disable card */
atomic_set(&zcrypt_rescan_req, 1);
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
card, queue);
/* For type 86 response show the apfs value (failure reason) */
if (ehdr->type == TYPE86_RSP_CODE) {
if (ehdr->reply_code == REP82_ERROR_TRANSPORT_FAIL &&
ehdr->type == TYPE86_RSP_CODE) {
struct {
struct type86_hdr hdr;
struct type86_fmt2_ext fmt2;
} __packed * head = reply->msg;
unsigned int apfs = *((u32 *)head->fmt2.apfs);
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x reply=0x%02x apfs=0x%x => online=0 rc=EAGAIN\n",
card, queue, apfs, ehdr->reply_code);
ZCRYPT_DBF(DBF_WARN,
"dev=%02x.%04x RY=0x%02x apfs=0x%x => bus rescan, rc=EAGAIN\n",
card, queue, ehdr->reply_code, apfs);
} else
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
ZCRYPT_DBF(DBF_WARN,
"dev=%02x.%04x RY=0x%02x => bus rescan, rc=EAGAIN\n",
card, queue, ehdr->reply_code);
return -EAGAIN;
case REP82_ERROR_MACHINE_FAILURE:
// REP88_ERROR_MODULE_FAILURE // '10' CEX2A
/* If a card fails disable it and repeat the request. */
atomic_set(&zcrypt_rescan_req, 1);
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
card, queue);
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
card, queue, ehdr->reply_code);
return -EAGAIN;
default:
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
card, queue);
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n",
/* Assume request is valid and a retry will be worth it */
ZCRYPT_DBF(DBF_WARN,
"dev=%02x.%04x RY=0x%02x => rc=EAGAIN\n",
card, queue, ehdr->reply_code);
return -EAGAIN; /* repeat the request on a different device. */
return -EAGAIN;
}
}
......
......@@ -356,15 +356,15 @@ static int convert_type80(struct zcrypt_queue *zq,
if (t80h->len < sizeof(*t80h) + outputdatalength) {
/* The result is too short, the CEXxA card may not do that.. */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
t80h->code);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
t80h->code);
ZCRYPT_DBF_ERR("dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
t80h->code);
return -EAGAIN;
}
if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
......@@ -376,10 +376,10 @@ static int convert_type80(struct zcrypt_queue *zq,
return 0;
}
static int convert_response(struct zcrypt_queue *zq,
struct ap_message *reply,
char __user *outputdata,
unsigned int outputdatalength)
static int convert_response_cex2a(struct zcrypt_queue *zq,
struct ap_message *reply,
char __user *outputdata,
unsigned int outputdatalength)
{
/* Response type byte is the second byte in the response. */
unsigned char rtype = ((unsigned char *) reply->msg)[1];
......@@ -393,15 +393,15 @@ static int convert_response(struct zcrypt_queue *zq,
outputdata, outputdatalength);
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(unsigned int) rtype);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) rtype);
ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) rtype);
return -EAGAIN;
}
}
......@@ -478,8 +478,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
if (rc == 0) {
rc = ap_msg.rc;
if (rc == 0)
rc = convert_response(zq, &ap_msg, mex->outputdata,
mex->outputdatalength);
rc = convert_response_cex2a(zq, &ap_msg,
mex->outputdata,
mex->outputdatalength);
} else
/* Signal pending. */
ap_cancel_message(zq->queue, &ap_msg);
......@@ -524,8 +525,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
if (rc == 0) {
rc = ap_msg.rc;
if (rc == 0)
rc = convert_response(zq, &ap_msg, crt->outputdata,
crt->outputdatalength);
rc = convert_response_cex2a(zq, &ap_msg,
crt->outputdata,
crt->outputdatalength);
} else
/* Signal pending. */
ap_cancel_message(zq->queue, &ap_msg);
......
......@@ -650,23 +650,22 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
(service_rc == 8 && service_rs == 72) ||
(service_rc == 8 && service_rs == 770) ||
(service_rc == 12 && service_rs == 769)) {
ZCRYPT_DBF(DBF_DEBUG,
"device=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) service_rc, (int) service_rs);
ZCRYPT_DBF_WARN("dev=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) service_rc, (int) service_rs);
return -EINVAL;
}
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x rc/rs=%d/%d online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) service_rc, (int) service_rs);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) service_rc, (int) service_rs);
ZCRYPT_DBF_ERR("dev=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) service_rc, (int) service_rs);
return -EAGAIN;
}
data = msg->text;
reply_len = msg->length - 2;
......@@ -800,17 +799,18 @@ static int convert_response_ica(struct zcrypt_queue *zq,
return convert_type86_ica(zq, reply,
outputdata, outputdatalength);
fallthrough; /* wrong cprb version is an unknown response */
default: /* Unknown response type, this should NEVER EVER happen */
default:
/* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN;
}
}
......@@ -836,15 +836,15 @@ static int convert_response_xcrb(bool userspace, struct zcrypt_queue *zq,
default: /* Unknown response type, this should NEVER EVER happen */
xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN;
}
}
......@@ -865,15 +865,15 @@ static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq,
fallthrough; /* wrong cprb version is an unknown resp */
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN;
}
}
......@@ -895,15 +895,15 @@ static int convert_response_rng(struct zcrypt_queue *zq,
fallthrough; /* wrong cprb version is an unknown response */
default: /* Unknown response type, this should NEVER EVER happen */
zq->online = 0;
pr_err("Cryptographic device %02x.%04x failed and was set offline\n",
pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid));
ZCRYPT_DBF(DBF_ERR,
"device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN; /* repeat the request on a different device. */
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n",
AP_QID_CARD(zq->queue->qid),
AP_QID_QUEUE(zq->queue->qid),
(int) msg->hdr.type);
return -EAGAIN;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment