Commit 33c12a54 authored by Sunil Mushran's avatar Sunil Mushran Committed by Joel Becker

ocfs2/cluster: Heartbeat mismatch message improved

If o2hb finds unexpected values in the heartbeat slot, it prints a message
"ERROR: Device "dm-6": another node is heartbeating in our slot!"

This message could be misleading. This patch adds two more messages to
help users better diagnose the problem.
Signed-off-by: default avatarSunil Mushran <sunil.mushran@oracle.com>
Acked-by: default avatarMark Fasheh <mfasheh@suse.com>
Signed-off-by: default avatarJoel Becker <jlbec@evilplan.org>
parent 76d9fc29
...@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg, ...@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
/* We want to make sure that nobody is heartbeating on top of us -- /* We want to make sure that nobody is heartbeating on top of us --
* this will help detect an invalid configuration. */ * this will help detect an invalid configuration. */
static int o2hb_check_last_timestamp(struct o2hb_region *reg) static void o2hb_check_last_timestamp(struct o2hb_region *reg)
{ {
int node_num, ret;
struct o2hb_disk_slot *slot; struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block; struct o2hb_disk_heartbeat_block *hb_block;
char *errstr;
node_num = o2nm_this_node(); slot = &reg->hr_slots[o2nm_this_node()];
ret = 1;
slot = &reg->hr_slots[node_num];
/* Don't check on our 1st timestamp */ /* Don't check on our 1st timestamp */
if (slot->ds_last_time) { if (!slot->ds_last_time)
hb_block = slot->ds_raw_block; return;
if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) hb_block = slot->ds_raw_block;
ret = 0; if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
} le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
hb_block->hb_node == slot->ds_node_num)
return;
return ret; #define ERRSTR1 "Another node is heartbeating on device"
#define ERRSTR2 "Heartbeat generation mismatch on device"
#define ERRSTR3 "Heartbeat sequence mismatch on device"
if (hb_block->hb_node != slot->ds_node_num)
errstr = ERRSTR1;
else if (le64_to_cpu(hb_block->hb_generation) !=
slot->ds_last_generation)
errstr = ERRSTR2;
else
errstr = ERRSTR3;
mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
"ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
(unsigned long long)slot->ds_last_time, hb_block->hb_node,
(unsigned long long)le64_to_cpu(hb_block->hb_generation),
(unsigned long long)le64_to_cpu(hb_block->hb_seq));
} }
static inline void o2hb_prepare_block(struct o2hb_region *reg, static inline void o2hb_prepare_block(struct o2hb_region *reg,
...@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) ...@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
/* With an up to date view of the slots, we can check that no /* With an up to date view of the slots, we can check that no
* other node has been improperly configured to heartbeat in * other node has been improperly configured to heartbeat in
* our slot. */ * our slot. */
if (!o2hb_check_last_timestamp(reg)) o2hb_check_last_timestamp(reg);
mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
"in our slot!\n", reg->hr_dev_name);
/* fill in the proper info for our next heartbeat */ /* fill in the proper info for our next heartbeat */
o2hb_prepare_block(reg, reg->hr_generation); o2hb_prepare_block(reg, reg->hr_generation);
...@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) ...@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
} }
i = -1; i = -1;
while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { while((i = find_next_bit(configured_nodes,
O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
change |= o2hb_check_slot(reg, &reg->hr_slots[i]); change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment