Commit 6f6c7006 authored by Sage Weil's avatar Sage Weil

libceph: fix osd request queuing on osdmap updates

If we send a request to osd A, and the request's pg remaps to osd B and
then back to A in quick succession, we need to resend the request to A. The
old code was only calling kick_requests after processing all incremental
maps in a message, so it was very possible to not resend a request that
needed to be resent.  This would make the osd eventually time out (at least
with the current default of osd timeouts enabled).

The correct approach is to scan requests on every map incremental.  This
patch refactors the kick code in a few ways:
 - all requests are either on req_lru (in flight), req_unsent (ready to
   send), or req_notarget (currently map to no up osd)
 - mapping always done by map_request (previous map_osds)
 - if the mapping changes, we requeue.  requests are resent only after all
   map incrementals are processed.
 - some osd reset code is moved out of kick_requests into a separate
   function
 - the "kick this osd" functionality is moved to kick_osd_requests, as it
   is unrelated to scanning for request->pg->osd mapping changes
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 09adc80c
...@@ -74,7 +74,6 @@ struct ceph_osd_request { ...@@ -74,7 +74,6 @@ struct ceph_osd_request {
char r_oid[40]; /* object name */ char r_oid[40]; /* object name */
int r_oid_len; int r_oid_len;
unsigned long r_stamp; /* send OR check time */ unsigned long r_stamp; /* send OR check time */
bool r_resend; /* msg send failed, needs retry */
struct ceph_file_layout r_file_layout; struct ceph_file_layout r_file_layout;
struct ceph_snap_context *r_snapc; /* snap context for writes */ struct ceph_snap_context *r_snapc; /* snap context for writes */
...@@ -104,7 +103,9 @@ struct ceph_osd_client { ...@@ -104,7 +103,9 @@ struct ceph_osd_client {
u64 timeout_tid; /* tid of timeout triggering rq */ u64 timeout_tid; /* tid of timeout triggering rq */
u64 last_tid; /* tid of last request */ u64 last_tid; /* tid of last request */
struct rb_root requests; /* pending requests */ struct rb_root requests; /* pending requests */
struct list_head req_lru; /* pending requests lru */ struct list_head req_lru; /* in-flight lru */
struct list_head req_unsent; /* unsent/need-resend queue */
struct list_head req_notarget; /* map to no osd */
int num_requests; int num_requests;
struct delayed_work timeout_work; struct delayed_work timeout_work;
struct delayed_work osds_timeout_work; struct delayed_work osds_timeout_work;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment