Commit df9f86fa authored by Sage Weil's avatar Sage Weil

ceph: fix small seq message skipping

If the client gets out of sync with the server message sequence number, we
normally skip low seq messages (ones we already received).  The skip code
was also incrementing the expected seq, such that all subsequent messages
also appeared old and got skipped, and an eventual timeout on the osd
connection.  This resulted in some lagging requests and console messages
like

[233480.882885] ceph: skipping osd22 10.138.138.13:6804 seq 2016, expected 2017
[233480.882919] ceph: skipping osd22 10.138.138.13:6804 seq 2017, expected 2018
[233480.882963] ceph: skipping osd22 10.138.138.13:6804 seq 2018, expected 2019
[233480.883488] ceph: skipping osd22 10.138.138.13:6804 seq 2019, expected 2020
[233485.219558] ceph: skipping osd22 10.138.138.13:6804 seq 2020, expected 2021
[233485.906595] ceph: skipping osd22 10.138.138.13:6804 seq 2021, expected 2022
[233490.379536] ceph: skipping osd22 10.138.138.13:6804 seq 2022, expected 2023
[233495.523260] ceph: skipping osd22 10.138.138.13:6804 seq 2023, expected 2024
[233495.923194] ceph: skipping osd22 10.138.138.13:6804 seq 2024, expected 2025
[233500.534614] ceph:  tid 6023602 timed out on osd22, will reset osd
Reported-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 2f56f56a
...@@ -1532,14 +1532,13 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -1532,14 +1532,13 @@ static int read_partial_message(struct ceph_connection *con)
/* verify seq# */ /* verify seq# */
seq = le64_to_cpu(con->in_hdr.seq); seq = le64_to_cpu(con->in_hdr.seq);
if ((s64)seq - (s64)con->in_seq < 1) { if ((s64)seq - (s64)con->in_seq < 1) {
pr_info("skipping %s%lld %s seq %lld, expected %lld\n", pr_info("skipping %s%lld %s seq %lld expected %lld\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1); seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len - con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer); sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY; con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
return 0; return 0;
} else if ((s64)seq - (s64)con->in_seq > 1) { } else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n", pr_err("read_partial_message bad seq %lld expected %lld\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment