Commit f52862f4 authored by Brian King's avatar Brian King Committed by Benjamin Herrenschmidt

powerpc/pseries: Fix partition migration hang under load

While testing partition migration with heavy CPU load using
shared processors, it was observed that sometimes the migration
would never complete and would appear to hang. Currently, the
migration code assumes that if H_SUCCESS is returned from the H_JOIN
then the migration is complete and the processor is waking up on
the target system. If there was an outstanding PROD to the processor
when the H_JOIN is called, however, it will return H_SUCCESS on the source
system, causing the migration to hang, or in some scenarios cause
the kernel to crash on the complete call waking the caller
of rtas_percpu_suspend_me. Fix this by calling H_JOIN multiple times
if necessary during the migration.
Signed-off-by: default avatarBrian King <brking@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 448e2ca0
...@@ -46,6 +46,7 @@ EXPORT_SYMBOL(rtas); ...@@ -46,6 +46,7 @@ EXPORT_SYMBOL(rtas);
struct rtas_suspend_me_data { struct rtas_suspend_me_data {
atomic_t working; /* number of cpus accessing this struct */ atomic_t working; /* number of cpus accessing this struct */
atomic_t done;
int token; /* ibm,suspend-me */ int token; /* ibm,suspend-me */
int error; int error;
struct completion *complete; /* wait on this until working == 0 */ struct completion *complete; /* wait on this until working == 0 */
...@@ -689,7 +690,7 @@ static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; ...@@ -689,7 +690,7 @@ static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
#ifdef CONFIG_PPC_PSERIES #ifdef CONFIG_PPC_PSERIES
static void rtas_percpu_suspend_me(void *info) static void rtas_percpu_suspend_me(void *info)
{ {
long rc; long rc = H_SUCCESS;
unsigned long msr_save; unsigned long msr_save;
int cpu; int cpu;
struct rtas_suspend_me_data *data = struct rtas_suspend_me_data *data =
...@@ -701,7 +702,8 @@ static void rtas_percpu_suspend_me(void *info) ...@@ -701,7 +702,8 @@ static void rtas_percpu_suspend_me(void *info)
msr_save = mfmsr(); msr_save = mfmsr();
mtmsr(msr_save & ~(MSR_EE)); mtmsr(msr_save & ~(MSR_EE));
rc = plpar_hcall_norets(H_JOIN); while (rc == H_SUCCESS && !atomic_read(&data->done))
rc = plpar_hcall_norets(H_JOIN);
mtmsr(msr_save); mtmsr(msr_save);
...@@ -724,6 +726,9 @@ static void rtas_percpu_suspend_me(void *info) ...@@ -724,6 +726,9 @@ static void rtas_percpu_suspend_me(void *info)
smp_processor_id(), rc); smp_processor_id(), rc);
data->error = rc; data->error = rc;
} }
atomic_set(&data->done, 1);
/* This cpu did the suspend or got an error; in either case, /* This cpu did the suspend or got an error; in either case,
* we need to prod all other other cpus out of join state. * we need to prod all other other cpus out of join state.
* Extra prods are harmless. * Extra prods are harmless.
...@@ -766,6 +771,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) ...@@ -766,6 +771,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
} }
atomic_set(&data.working, 0); atomic_set(&data.working, 0);
atomic_set(&data.done, 0);
data.token = rtas_token("ibm,suspend-me"); data.token = rtas_token("ibm,suspend-me");
data.error = 0; data.error = 0;
data.complete = &done; data.complete = &done;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment