Commit 153a008b authored by Sage Weil's avatar Sage Weil

ceph: reset osd connections after fault

A single osd connection fault (e.g. tcp disconnect) wasn't
reopening the connection, which causes all current and future
requests for that osd to hang.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 6c5d1a49
...@@ -369,7 +369,6 @@ static void osd_reset(struct ceph_connection *con) ...@@ -369,7 +369,6 @@ static void osd_reset(struct ceph_connection *con)
return; return;
dout("osd_reset osd%d\n", osd->o_osd); dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc; osdc = osd->o_osdc;
osd->o_incarnation++;
down_read(&osdc->map_sem); down_read(&osdc->map_sem);
kick_requests(osdc, osd); kick_requests(osdc, osd);
up_read(&osdc->map_sem); up_read(&osdc->map_sem);
...@@ -921,7 +920,9 @@ static void kick_requests(struct ceph_osd_client *osdc, ...@@ -921,7 +920,9 @@ static void kick_requests(struct ceph_osd_client *osdc,
dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
mutex_lock(&osdc->request_mutex); mutex_lock(&osdc->request_mutex);
if (!kickosd) { if (kickosd) {
__reset_osd(osdc, kickosd);
} else {
for (p = rb_first(&osdc->osds); p; p = n) { for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd = struct ceph_osd *osd =
rb_entry(p, struct ceph_osd, o_node); rb_entry(p, struct ceph_osd, o_node);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment