Commit a02a946d authored by Ilya Dryomov's avatar Ilya Dryomov

libceph: respect RADOS_BACKOFF backoffs

Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 76f827a7
......@@ -147,6 +147,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_OSD_OP 42
#define CEPH_MSG_OSD_OPREPLY 43
#define CEPH_MSG_WATCH_NOTIFY 44
#define CEPH_MSG_OSD_BACKOFF 61
/* watch-notify operations */
......
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H
#include <linux/bitrev.h>
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/mempool.h>
......@@ -36,6 +37,8 @@ struct ceph_osd {
struct ceph_connection o_con;
struct rb_root o_requests;
struct rb_root o_linger_requests;
struct rb_root o_backoff_mappings;
struct rb_root o_backoffs_by_id;
struct list_head o_osd_lru;
struct ceph_auth_handshake o_auth;
unsigned long lru_ttl;
......@@ -275,6 +278,48 @@ struct ceph_watch_item {
struct ceph_entity_addr addr;
};
struct ceph_spg_mapping {
struct rb_node node;
struct ceph_spg spgid;
struct rb_root backoffs;
};
struct ceph_hobject_id {
void *key;
size_t key_len;
void *oid;
size_t oid_len;
u64 snapid;
u32 hash;
u8 is_max;
void *nspace;
size_t nspace_len;
s64 pool;
/* cache */
u32 hash_reverse_bits;
};
static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
{
hoid->hash_reverse_bits = bitrev32(hoid->hash);
}
/*
* PG-wide backoff: [begin, end)
* per-object backoff: begin == end
*/
struct ceph_osd_backoff {
struct rb_node spg_node;
struct rb_node id_node;
struct ceph_spg spgid;
u64 id;
struct ceph_hobject_id *begin;
struct ceph_hobject_id *end;
};
#define CEPH_LINGER_ID_START 0xffff000000000000ULL
struct ceph_osd_client {
......
......@@ -32,6 +32,7 @@ struct ceph_spg {
};
int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
......
......@@ -439,6 +439,12 @@ enum {
const char *ceph_osd_watch_op_name(int o);
enum {
CEPH_OSD_BACKOFF_OP_BLOCK = 1,
CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
};
/*
* an individual object operation. each may be accompanied by some data
* payload
......
......@@ -85,6 +85,7 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
case CEPH_MSG_OSD_BACKOFF: return "osd_backoff";
default: return "unknown";
}
}
......
......@@ -243,6 +243,73 @@ static void dump_linger_requests(struct seq_file *s, struct ceph_osd *osd)
mutex_unlock(&osd->lock);
}
static void dump_snapid(struct seq_file *s, u64 snapid)
{
if (snapid == CEPH_NOSNAP)
seq_puts(s, "head");
else if (snapid == CEPH_SNAPDIR)
seq_puts(s, "snapdir");
else
seq_printf(s, "%llx", snapid);
}
static void dump_name_escaped(struct seq_file *s, unsigned char *name,
size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (name[i] == '%' || name[i] == ':' || name[i] == '/' ||
name[i] < 32 || name[i] >= 127) {
seq_printf(s, "%%%02x", name[i]);
} else {
seq_putc(s, name[i]);
}
}
}
static void dump_hoid(struct seq_file *s, const struct ceph_hobject_id *hoid)
{
if (hoid->snapid == 0 && hoid->hash == 0 && !hoid->is_max &&
hoid->pool == S64_MIN) {
seq_puts(s, "MIN");
return;
}
if (hoid->is_max) {
seq_puts(s, "MAX");
return;
}
seq_printf(s, "%lld:%08x:", hoid->pool, hoid->hash_reverse_bits);
dump_name_escaped(s, hoid->nspace, hoid->nspace_len);
seq_putc(s, ':');
dump_name_escaped(s, hoid->key, hoid->key_len);
seq_putc(s, ':');
dump_name_escaped(s, hoid->oid, hoid->oid_len);
seq_putc(s, ':');
dump_snapid(s, hoid->snapid);
}
static void dump_backoffs(struct seq_file *s, struct ceph_osd *osd)
{
struct rb_node *n;
mutex_lock(&osd->lock);
for (n = rb_first(&osd->o_backoffs_by_id); n; n = rb_next(n)) {
struct ceph_osd_backoff *backoff =
rb_entry(n, struct ceph_osd_backoff, id_node);
seq_printf(s, "osd%d\t", osd->o_osd);
dump_spgid(s, &backoff->spgid);
seq_printf(s, "\t%llu\t", backoff->id);
dump_hoid(s, backoff->begin);
seq_putc(s, '\t');
dump_hoid(s, backoff->end);
seq_putc(s, '\n');
}
mutex_unlock(&osd->lock);
}
static int osdc_show(struct seq_file *s, void *pp)
{
struct ceph_client *client = s->private;
......@@ -268,6 +335,13 @@ static int osdc_show(struct seq_file *s, void *pp)
}
dump_linger_requests(s, &osdc->homeless_osd);
seq_puts(s, "BACKOFFS\n");
for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
dump_backoffs(s, osd);
}
up_read(&osdc->lock);
return 0;
}
......
This diff is collapsed.
......@@ -418,6 +418,22 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
return 0;
}
int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
{
int ret;
ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
if (ret)
return ret;
if (lhs->shard < rhs->shard)
return -1;
if (lhs->shard > rhs->shard)
return 1;
return 0;
}
/*
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
* to a set of osds) and primary_temp (explicit primary setting)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment