Commit 23776f53 authored by unknown's avatar unknown

Fix for the following bugs:

  - BUG#22306: STOP INSTANCE can not be applied for instances in Crashed,
    Failed and Abandoned;
  - BUG#23476: DROP INSTANCE does not work
  - BUG#23215: STOP INSTANCE takes too much time

BUG#22306:
The problem was that STOP INSTANCE checked that mysqld is up and running.
If it was not so, STOP INSTANCE reported an error. Now, STOP INSTANCE
reports an error if the instance has been started (mysqld can be down).

BUG#23476:
The problem was that DROP INSTANCE tried to stop inactive instance. The fix is
trivial.

BUG#23215:
The problem was that locks were not acquired properly, so the
instance-monitoring thread could not acquire the mutex, holded by the
query-processing thread.

The fix is to simplify locking scheme by moving instance-related information to
Instance-class out of Guardian-class. This allows to get rid of storing a
separate list of Instance-information in Guardian and keeping it synchronized
with the original list in Instance_map.


server-tools/instance-manager/commands.cc:
  1. Introduce Instance_cmd class -- base class for the commands
     that deal with the one instance;
  2. Remove Instance_map argument from command constructors;
  3. Ensure, that Instance Map and Instance are locked in the proper order;
  4. Polishing.
server-tools/instance-manager/commands.h:
  1. Introduce Instance_cmd class -- base class for the commands
     that deal with the one instance;
  2. Remove Instance_map argument from command constructors;
  3. Polishing.
server-tools/instance-manager/guardian.cc:
  1. Move "extended" instance information to the Instance-class.
     That allows to get rid of storing instance-related container and data in
     Guardian class, that significantly simplifies locking schema.
  2. Polishing.
server-tools/instance-manager/guardian.h:
  1. Move "extended" instance information to the Instance-class.
     That allows to get rid of storing instance-related container and data in
     Guardian class, that significantly simplifies locking schema.
  2. Polishing.
server-tools/instance-manager/instance.cc:
  1. Move "extended" instance information to the Instance-class.
  2. Introduce new state STOPPED to mark that guarded instance
     is stopped and should not be restarted by Guardian.
  3. Polishing.
server-tools/instance-manager/instance.h:
  1. Move "extended" instance information to the Instance-class.
  2. Introduce new state STOPPED to mark that guarded instance
     is stopped and should not be restarted by Guardian.
  3. Polishing.
server-tools/instance-manager/instance_map.cc:
  1. Move flush_instances() from Instance_map to Manager.
  2. Polishing.
server-tools/instance-manager/instance_map.h:
  1. Move flush_instances() from Instance_map to Manager.
  2. Polishing.
server-tools/instance-manager/instance_options.h:
  Polishing.
server-tools/instance-manager/manager.cc:
  1. Move flush_instances() from Instance_map to Manager.
  2. Polishing.
server-tools/instance-manager/manager.h:
  1. Move flush_instances() from Instance_map to Manager.
  2. Polishing.
server-tools/instance-manager/user_map.cc:
  Polishing.
parent 6949b042
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "guardian.h" #include "guardian.h"
#include "instance_map.h" #include "instance_map.h"
#include "log.h" #include "log.h"
#include "manager.h"
#include "messages.h" #include "messages.h"
#include "mysqld_error.h" #include "mysqld_error.h"
#include "mysql_manager_error.h" #include "mysql_manager_error.h"
...@@ -36,8 +37,11 @@ ...@@ -36,8 +37,11 @@
#include "priv.h" #include "priv.h"
#include "protocol.h" #include "protocol.h"
/**************************************************************************
{{{ Static functions.
**************************************************************************/
/* /**
modify_defaults_to_im_error -- a map of error codes of modify_defaults_to_im_error -- a map of error codes of
mysys::modify_defaults_file() into Instance Manager error codes. mysys::modify_defaults_file() into Instance Manager error codes.
*/ */
...@@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES, ...@@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES,
ER_ACCESS_OPTION_FILE }; ER_ACCESS_OPTION_FILE };
/* /**
Add a string to a buffer. Parse version number from the version string.
SYNOPSIS SYNOPSIS
put_to_buff() parse_version_number()
buff buffer to add the string version_str
str string to add version
position offset in the buff to add a string version_size
DESCRIPTION DESCRIPTION
TODO
Function to add a string to the buffer. It is different from TODO: Move this function to Instance_options and parse version number
store_to_protocol_packet, which is used in the protocol.cc. only once.
The last one also stores the length of the string in a special way.
This is required for MySQL client/server protocol support only.
RETURN NOTE: This function is used only in SHOW INSTANCE STATUS statement at the
0 - ok moment.
1 - error occured
*/ */
static inline int put_to_buff(Buffer *buff, const char *str, uint *position)
{
uint len= strlen(str);
if (buff->append(*position, str, len))
return 1;
*position+= len;
return 0;
}
static int parse_version_number(const char *version_str, char *version, static int parse_version_number(const char *version_str, char *version,
uint version_size) uint version_size)
{ {
...@@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version, ...@@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version,
return 0; return 0;
} }
/**************************************************************************
}}}
**************************************************************************/
/************************************************************************** /**************************************************************************
Implementation of Instance_name. Implementation of Instance_name.
...@@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name) ...@@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name)
Implementation of Show_instances. Implementation of Show_instances.
**************************************************************************/ **************************************************************************/
/* /**
Implementation of SHOW INSTANCES statement. Implementation of SHOW INSTANCES statement.
Possible error codes: Possible error codes:
...@@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net) ...@@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net)
Instance *instance; Instance *instance;
Instance_map::Iterator iterator(instance_map); Instance_map::Iterator iterator(instance_map);
instance_map->guardian->lock();
instance_map->lock(); instance_map->lock();
while ((instance= iterator.next())) while ((instance= iterator.next()))
...@@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net) ...@@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net)
Buffer send_buf; /* buffer for packets */ Buffer send_buf; /* buffer for packets */
uint pos= 0; uint pos= 0;
instance->lock();
const char *instance_name= instance->options.instance_name.str; const char *instance_name= instance->options.instance_name.str;
const char *state_name= instance_map->get_instance_state_name(instance); const char *state_name= instance->get_state_name();
if (store_to_protocol_packet(&send_buf, instance_name, &pos) || if (store_to_protocol_packet(&send_buf, instance_name, &pos) ||
store_to_protocol_packet(&send_buf, state_name, &pos) || store_to_protocol_packet(&send_buf, state_name, &pos) ||
my_net_write(net, send_buf.buffer, pos)) my_net_write(net, send_buf.buffer, pos))
{ {
err_status= TRUE; err_status= TRUE;
break;
} }
instance->unlock();
if (err_status)
break;
} }
instance_map->unlock(); instance_map->unlock();
instance_map->guardian->unlock();
return err_status ? ER_OUT_OF_RESOURCES : 0; return err_status ? ER_OUT_OF_RESOURCES : 0;
} }
...@@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net) ...@@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net)
Implementation of Flush_instances. Implementation of Flush_instances.
**************************************************************************/ **************************************************************************/
/* /**
Implementation of FLUSH INSTANCES statement. Implementation of FLUSH INSTANCES statement.
Possible error codes: Possible error codes:
...@@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net) ...@@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net)
int Flush_instances::execute(st_net *net, ulong connection_id) int Flush_instances::execute(st_net *net, ulong connection_id)
{ {
instance_map->guardian->lock(); if (Manager::flush_instances())
instance_map->lock();
if (instance_map->is_there_active_instance())
{
instance_map->unlock();
instance_map->guardian->unlock();
return ER_THERE_IS_ACTIVE_INSTACE;
}
if (instance_map->flush_instances())
{
instance_map->unlock();
instance_map->guardian->unlock();
return ER_OUT_OF_RESOURCES; return ER_OUT_OF_RESOURCES;
}
instance_map->unlock();
instance_map->guardian->unlock();
return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0; return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0;
} }
/************************************************************************** /**************************************************************************
Implementation of Abstract_instance_cmd. Implementation of Instance_cmd.
**************************************************************************/ **************************************************************************/
Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg) Instance_cmd::Instance_cmd(const LEX_STRING *instance_name_arg):
:instance_name(instance_name_arg) instance_name(instance_name_arg)
{ {
/* /*
MT-NOTE: we can not make a search for Instance object here, MT-NOTE: we can not make a search for Instance object here,
...@@ -251,14 +232,25 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg ...@@ -251,14 +232,25 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg
} }
/**************************************************************************
Implementation of Abstract_instance_cmd.
**************************************************************************/
Abstract_instance_cmd::Abstract_instance_cmd(
const LEX_STRING *instance_name_arg)
:Instance_cmd(instance_name_arg)
{
}
int Abstract_instance_cmd::execute(st_net *net, ulong connection_id) int Abstract_instance_cmd::execute(st_net *net, ulong connection_id)
{ {
int err_code; int err_code;
Instance *instance;
instance_map->lock(); instance_map->lock();
{ instance= instance_map->find(get_instance_name());
Instance *instance= instance_map->find(get_instance_name());
if (!instance) if (!instance)
{ {
...@@ -266,10 +258,12 @@ int Abstract_instance_cmd::execute(st_net *net, ulong connection_id) ...@@ -266,10 +258,12 @@ int Abstract_instance_cmd::execute(st_net *net, ulong connection_id)
return ER_BAD_INSTANCE_NAME; return ER_BAD_INSTANCE_NAME;
} }
instance->lock();
instance_map->unlock();
err_code= execute_impl(net, instance); err_code= execute_impl(net, instance);
}
instance_map->unlock(); instance->unlock();
if (!err_code) if (!err_code)
err_code= send_ok_response(net, connection_id); err_code= send_ok_response(net, connection_id);
...@@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg) ...@@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg)
} }
/* /**
Implementation of SHOW INSTANCE STATUS statement. Implementation of SHOW INSTANCE STATUS statement.
Possible error codes: Possible error codes:
...@@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance) ...@@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance)
char version_num_buf[MAX_VERSION_LENGTH]; char version_num_buf[MAX_VERSION_LENGTH];
uint pos= 0; uint pos= 0;
const char *state_name; const char *state_name= instance->get_state_name();
const char *version_tag= "unknown"; const char *version_tag= "unknown";
const char *version_num= "unknown"; const char *version_num= "unknown";
const char *mysqld_compatible_status; const char *mysqld_compatible_status=
instance->is_mysqld_compatible() ? "yes" : "no";
instance_map->guardian->lock();
state_name= instance_map->get_instance_state_name(instance);
mysqld_compatible_status= instance->is_mysqld_compatible() ? "yes" : "no";
instance_map->guardian->unlock();
if (instance->options.mysqld_version) if (instance->options.mysqld_version)
{ {
if (parse_version_number(instance->options.mysqld_version, version_num_buf, if (parse_version_number(instance->options.mysqld_version, version_num_buf,
sizeof(version_num_buf))) sizeof(version_num_buf)))
return ER_OUT_OF_RESOURCES; return ER_OUT_OF_RESOURCES;
...@@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options( ...@@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options(
} }
/* /**
Implementation of SHOW INSTANCE OPTIONS statement. Implementation of SHOW INSTANCE OPTIONS statement.
Possible error codes: Possible error codes:
...@@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg) ...@@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg)
} }
/* /**
Implementation of START INSTANCE statement. Implementation of START INSTANCE statement.
Possible error codes: Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist ER_BAD_INSTANCE_NAME The instance with the given name does not exist
ER_OUT_OF_RESOURCES Not enough resources to complete the operation ER_INSTANCE_MISCONFIGURED The instance configuration is invalid
ER_INSTANCE_ALREADY_STARTED The instance is already started
ER_CANNOT_START_INSTANCE The instance could not have been started
TODO: as soon as this method operates only with Instance, we probably
should introduce a new method (execute_stop_instance()) in Instance and
just call it from here.
*/ */
int Start_instance::execute_impl(st_net * /* net */, Instance *instance) int Start_instance::execute_impl(st_net * /* net */, Instance *instance)
{ {
int err_code; if (!instance->is_configured())
return ER_INSTANCE_MISCONFIGURED;
if ((err_code= instance->start())) if (instance->is_active())
return err_code; return ER_INSTANCE_ALREADY_STARTED;
if (instance->start_mysqld())
return ER_CANNOT_START_INSTANCE;
if (!(instance->options.nonguarded)) instance->reset_stat();
instance_map->guardian->guard(instance); instance->set_state(Instance::NOT_STARTED);
return 0; return 0;
} }
...@@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg) ...@@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg)
} }
/* /**
Implementation of STOP INSTANCE statement. Implementation of STOP INSTANCE statement.
Possible error codes: Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist ER_BAD_INSTANCE_NAME The instance with the given name does not exist
ER_OUT_OF_RESOURCES Not enough resources to complete the operation ER_OUT_OF_RESOURCES Not enough resources to complete the operation
TODO: as soon as this method operates only with Instance, we probably
should introduce a new method (execute_stop_instance()) in Instance and
just call it from here.
*/ */
int Stop_instance::execute_impl(st_net * /* net */, Instance *instance) int Stop_instance::execute_impl(st_net * /* net */, Instance *instance)
{ {
int err_code; if (!instance->is_active())
return ER_INSTANCE_IS_NOT_STARTED;
if (!(instance->options.nonguarded)) instance->set_state(Instance::STOPPED);
instance_map->guardian->stop_guard(instance);
if ((err_code= instance->stop()))
return err_code;
return 0; return instance->stop_mysqld() ? ER_STOP_INSTANCE : 0;
} }
...@@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id) ...@@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id)
**************************************************************************/ **************************************************************************/
Create_instance::Create_instance(const LEX_STRING *instance_name_arg) Create_instance::Create_instance(const LEX_STRING *instance_name_arg)
:instance_name(instance_name_arg) :Instance_cmd(instance_name_arg)
{ {
} }
/* /**
This operation initializes Create_instance object. This operation initializes Create_instance object.
SYNOPSIS SYNOPSIS
...@@ -604,7 +604,7 @@ bool Create_instance::init(const char **text) ...@@ -604,7 +604,7 @@ bool Create_instance::init(const char **text)
} }
/* /**
This operation parses CREATE INSTANCE options. This operation parses CREATE INSTANCE options.
SYNOPSIS SYNOPSIS
...@@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text) ...@@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text)
} }
/* /**
Implementation of CREATE INSTANCE statement. Implementation of CREATE INSTANCE statement.
Possible error codes: Possible error codes:
...@@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text) ...@@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text)
int Create_instance::execute(st_net *net, ulong connection_id) int Create_instance::execute(st_net *net, ulong connection_id)
{ {
int err_code; int err_code;
Instance *instance;
/* Check that the name is valid and there is no instance with such name. */ /* Check that the name is valid and there is no instance with such name. */
...@@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id) ...@@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id)
return err_code; return err_code;
} }
instance= instance_map->find(get_instance_name());
DBUG_ASSERT(instance);
if ((err_code= create_instance_in_file(get_instance_name(), &options))) if ((err_code= create_instance_in_file(get_instance_name(), &options)))
{ {
Instance *instance= instance_map->find(get_instance_name());
if (instance)
instance_map->remove_instance(instance); /* instance is deleted here. */ instance_map->remove_instance(instance); /* instance is deleted here. */
instance_map->unlock(); instance_map->unlock();
return err_code; return err_code;
} }
/*
CREATE INSTANCE must not lead to start instance, even if it guarded.
TODO: The problem however is that if Instance Manager restarts after
creating instance, the instance will be restarted (see also BUG#19718).
*/
instance->set_state(Instance::STOPPED);
/* That's all. */ /* That's all. */
instance_map->unlock(); instance_map->unlock();
...@@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id) ...@@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id)
**************************************************************************/ **************************************************************************/
Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
:Abstract_instance_cmd(instance_name_arg) :Instance_cmd(instance_name_arg)
{ {
} }
/* /**
Implementation of DROP INSTANCE statement. Implementation of DROP INSTANCE statement.
Possible error codes: Possible error codes:
...@@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) ...@@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
ER_OUT_OF_RESOURCES Not enough resources to complete the operation ER_OUT_OF_RESOURCES Not enough resources to complete the operation
*/ */
int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) int Drop_instance::execute(st_net *net, ulong connection_id)
{ {
int err_code; int err_code;
Instance *instance;
/* Lock Guardian, then Instance_map. */
instance_map->lock();
/* Find an instance. */
instance= instance_map->find(get_instance_name());
if (!instance)
{
instance_map->unlock();
return ER_BAD_INSTANCE_NAME;
}
instance->lock();
/* Check that the instance is offline. */ /* Check that the instance is offline. */
if (instance_map->guardian->is_active(instance)) if (instance->is_active())
{
instance->unlock();
instance_map->unlock();
return ER_DROP_ACTIVE_INSTANCE; return ER_DROP_ACTIVE_INSTANCE;
}
/* Try to remove instance from the file. */
err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL, err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL,
get_instance_name()->str, MY_REMOVE_SECTION); get_instance_name()->str, MY_REMOVE_SECTION);
...@@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) ...@@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance)
(const char *) get_instance_name()->str, (const char *) get_instance_name()->str,
(const char *) Options::Main::config_file, (const char *) Options::Main::config_file,
(int) err_code); (int) err_code);
}
if (err_code) instance->unlock();
instance_map->unlock();
return modify_defaults_to_im_error[err_code]; return modify_defaults_to_im_error[err_code];
}
/* Remove instance from the instance map hash and Guardian's list. */ /* Unlock the instance before destroy. */
if (!instance->options.nonguarded) instance->unlock();
instance_map->guardian->stop_guard(instance);
if ((err_code= instance->stop())) /*
return err_code; Remove instance from the instance map
(the instance will be also destroyed here).
*/
instance_map->remove_instance(instance); instance_map->remove_instance(instance);
return 0; /* Unlock the instance map. */
}
instance_map->unlock();
/* That's all: send ok. */
int Drop_instance::send_ok_response(st_net *net, ulong connection_id)
{
if (net_send_ok(net, connection_id, "Instance dropped")) if (net_send_ok(net, connection_id, "Instance dropped"))
return ER_OUT_OF_RESOURCES; return ER_OUT_OF_RESOURCES;
...@@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg, ...@@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg,
} }
/* /**
Implementation of SHOW INSTANCE LOG statement. Implementation of SHOW INSTANCE LOG statement.
Possible error codes: Possible error codes:
...@@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files ...@@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files
} }
/* /**
Implementation of SHOW INSTANCE LOG FILES statement. Implementation of SHOW INSTANCE LOG FILES statement.
Possible error codes: Possible error codes:
...@@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance) ...@@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance)
Implementation of Abstract_option_cmd. Implementation of Abstract_option_cmd.
**************************************************************************/ **************************************************************************/
/* /**
Instance_options_list -- a data class representing a list of options for Instance_options_list -- a data class representing a list of options for
some instance. some instance.
*/ */
...@@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text) ...@@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text)
} }
/* /**
Correct the option file. The "skip" option is used to remove the found Correct the option file. The "skip" option is used to remove the found
option. option.
...@@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option, ...@@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option,
} }
/* /**
Implementation of SET statement. Lock Instance Map and call execute_impl().
Possible error codes: Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist ER_BAD_INSTANCE_NAME The instance with the given name does not exist
...@@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name) ...@@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name)
} }
/**
Skeleton implementation of option-management command.
MT-NOTE: Instance Map is locked before calling this operation.
*/
int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
{ {
int err_code= 0; int err_code= 0;
...@@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) ...@@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst= Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i); (Instance_options_list *) hash_element(&instance_options_map, i);
bool instance_is_active;
lst->instance= instance_map->find(lst->get_instance_name()); lst->instance= instance_map->find(lst->get_instance_name());
if (!lst->instance) if (!lst->instance)
return ER_BAD_INSTANCE_NAME; return ER_BAD_INSTANCE_NAME;
if (instance_map->guardian->is_active(lst->instance)) lst->instance->lock();
instance_is_active= lst->instance->is_active();
lst->instance->unlock();
if (instance_is_active)
return ER_INSTANCE_IS_ACTIVE; return ER_INSTANCE_IS_ACTIVE;
} }
...@@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) ...@@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst= Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i); (Instance_options_list *) hash_element(&instance_options_map, i);
lst->instance->lock();
for (int j= 0; j < lst->options.get_size(); ++j) for (int j= 0; j < lst->options.get_size(); ++j)
{ {
Named_value option= lst->options.get_element(j); Named_value option= lst->options.get_element(j);
...@@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) ...@@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
break; break;
} }
lst->instance->unlock();
if (err_code) if (err_code)
break; break;
} }
...@@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) ...@@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Implementation of Set_option. Implementation of Set_option.
**************************************************************************/ **************************************************************************/
/* /**
This operation parses SET options. This operation parses SET options.
SYNOPSIS SYNOPSIS
...@@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option) ...@@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option)
Implementation of Unset_option. Implementation of Unset_option.
**************************************************************************/ **************************************************************************/
/* /**
This operation parses UNSET options. This operation parses UNSET options.
SYNOPSIS SYNOPSIS
...@@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text) ...@@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text)
} }
/* /**
Implementation of UNSET statement. Implementation of UNSET statement.
Possible error codes: Possible error codes:
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#endif #endif
/* /**
Print all instances of this instance manager. Print all instances of this instance manager.
Grammar: SHOW INSTANCES Grammar: SHOW INSTANCES
*/ */
...@@ -50,7 +50,7 @@ private: ...@@ -50,7 +50,7 @@ private:
}; };
/* /**
Reread configuration file and refresh internal cache. Reread configuration file and refresh internal cache.
Grammar: FLUSH INSTANCES Grammar: FLUSH INSTANCES
*/ */
...@@ -66,11 +66,50 @@ public: ...@@ -66,11 +66,50 @@ public:
}; };
/* /**
Base class for Instance-specific commands
(commands that operate on one instance).
Instance_cmd extends Command class by:
- an attribute for storing instance name;
- code to initialize instance name in constructor;
- an accessor to get instance name.
*/
class Instance_cmd : public Command
{
public:
Instance_cmd(const LEX_STRING *instance_name_arg);
protected:
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private:
Instance_name instance_name;
};
/**
Abstract class for Instance-specific commands. Abstract class for Instance-specific commands.
Abstract_instance_cmd extends Instance_cmd by providing a common
framework for writing command-implementations. Basically, the class
implements Command::execute() pure virtual function in the following
way:
- Lock Instance_map;
- Get an instance by name. Return an error, if there is no such
instance;
- Lock the instance;
- Unlock Instance_map;
- Call execute_impl(), which should be implemented in derived class;
- Unlock the instance;
- Send response to the client and return error status.
*/ */
class Abstract_instance_cmd: public Command class Abstract_instance_cmd: public Instance_cmd
{ {
public: public:
Abstract_instance_cmd(const LEX_STRING *instance_name_arg); Abstract_instance_cmd(const LEX_STRING *instance_name_arg);
...@@ -79,29 +118,24 @@ public: ...@@ -79,29 +118,24 @@ public:
virtual int execute(st_net *net, ulong connection_id); virtual int execute(st_net *net, ulong connection_id);
protected: protected:
/* MT-NOTE: this operation is called under acquired Instance_map's lock. */ /**
This operation is intended to contain command-specific implementation.
MT-NOTE: this operation is called under acquired Instance's lock.
*/
virtual int execute_impl(st_net *net, Instance *instance) = 0; virtual int execute_impl(st_net *net, Instance *instance) = 0;
/* /**
This operation is invoked on successful return of execute_impl() and is This operation is invoked on successful return of execute_impl() and is
intended to send closing data. intended to send closing data.
MT-NOTE: this operation is called under released Instance_map's lock. MT-NOTE: this operation is called under released Instance's lock.
*/ */
virtual int send_ok_response(st_net *net, ulong connection_id) = 0; virtual int send_ok_response(st_net *net, ulong connection_id) = 0;
protected:
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private:
Instance_name instance_name;
}; };
/* /**
Print status of an instance. Print status of an instance.
Grammar: SHOW INSTANCE STATUS <instance_name> Grammar: SHOW INSTANCE STATUS <instance_name>
*/ */
...@@ -121,7 +155,7 @@ private: ...@@ -121,7 +155,7 @@ private:
}; };
/* /**
Print options of chosen instance. Print options of chosen instance.
Grammar: SHOW INSTANCE OPTIONS <instance_name> Grammar: SHOW INSTANCE OPTIONS <instance_name>
*/ */
...@@ -141,7 +175,7 @@ private: ...@@ -141,7 +175,7 @@ private:
}; };
/* /**
Start an instance. Start an instance.
Grammar: START INSTANCE <instance_name> Grammar: START INSTANCE <instance_name>
*/ */
...@@ -157,7 +191,7 @@ protected: ...@@ -157,7 +191,7 @@ protected:
}; };
/* /**
Stop an instance. Stop an instance.
Grammar: STOP INSTANCE <instance_name> Grammar: STOP INSTANCE <instance_name>
*/ */
...@@ -173,12 +207,12 @@ protected: ...@@ -173,12 +207,12 @@ protected:
}; };
/* /**
Create an instance. Create an instance.
Grammar: CREATE INSTANCE <instance_name> [<options>] Grammar: CREATE INSTANCE <instance_name> [<options>]
*/ */
class Create_instance: public Command class Create_instance: public Instance_cmd
{ {
public: public:
Create_instance(const LEX_STRING *instance_name_arg); Create_instance(const LEX_STRING *instance_name_arg);
...@@ -189,22 +223,15 @@ public: ...@@ -189,22 +223,15 @@ public:
protected: protected:
virtual int execute(st_net *net, ulong connection_id); virtual int execute(st_net *net, ulong connection_id);
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private: private:
bool parse_args(const char **text); bool parse_args(const char **text);
private: private:
Instance_name instance_name;
Named_value_arr options; Named_value_arr options;
}; };
/* /**
Drop an instance. Drop an instance.
Grammar: DROP INSTANCE <instance_name> Grammar: DROP INSTANCE <instance_name>
...@@ -213,18 +240,17 @@ private: ...@@ -213,18 +240,17 @@ private:
is removed from the instance map. is removed from the instance map.
*/ */
class Drop_instance: public Abstract_instance_cmd class Drop_instance: public Instance_cmd
{ {
public: public:
Drop_instance(const LEX_STRING *instance_name_arg); Drop_instance(const LEX_STRING *instance_name_arg);
protected: protected:
virtual int execute_impl(st_net *net, Instance *instance); virtual int execute(st_net *net, ulong connection_id);
virtual int send_ok_response(st_net *net, ulong connection_id);
}; };
/* /**
Print requested part of the log. Print requested part of the log.
Grammar: Grammar:
SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end] SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end]
...@@ -252,7 +278,7 @@ private: ...@@ -252,7 +278,7 @@ private:
}; };
/* /**
Shows the list of the log files, used by an instance. Shows the list of the log files, used by an instance.
Grammar: SHOW <instance_name> LOG FILES Grammar: SHOW <instance_name> LOG FILES
*/ */
...@@ -272,7 +298,7 @@ private: ...@@ -272,7 +298,7 @@ private:
}; };
/* /**
Abstract class for option-management commands. Abstract class for option-management commands.
*/ */
...@@ -312,7 +338,7 @@ private: ...@@ -312,7 +338,7 @@ private:
}; };
/* /**
Set an option for the instance. Set an option for the instance.
Grammar: SET instance_name.option[=option_value][, ...] Grammar: SET instance_name.option[=option_value][, ...]
*/ */
...@@ -329,7 +355,7 @@ protected: ...@@ -329,7 +355,7 @@ protected:
}; };
/* /**
Remove option of the instance. Remove option of the instance.
Grammar: UNSET instance_name.option[, ...] Grammar: UNSET instance_name.option[, ...]
*/ */
...@@ -346,7 +372,7 @@ protected: ...@@ -346,7 +372,7 @@ protected:
}; };
/* /**
Syntax error command. Syntax error command.
This command is issued if parser reported a syntax error. We need it to This command is issued if parser reported a syntax error. We need it to
......
...@@ -28,101 +28,126 @@ ...@@ -28,101 +28,126 @@
#include "instance_map.h" #include "instance_map.h"
#include "log.h" #include "log.h"
#include "mysql_manager_error.h" #include "mysql_manager_error.h"
#include "options.h"
const char *
Guardian::get_instance_state_name(enum_instance_state state)
{
switch (state) {
case NOT_STARTED:
return "offline";
case STARTING:
return "starting";
case STARTED:
return "online";
case JUST_CRASHED: /*************************************************************************
return "failed"; {{{ Constructor & destructor.
*************************************************************************/
case CRASHED: /**
return "crashed"; Guardian constructor.
case CRASHED_AND_ABANDONED:
return "abandoned";
case STOPPING:
return "stopping";
}
return NULL; /* just to ignore compiler warning. */ SYNOPSIS
} Guardian()
thread_registry_arg
instance_map_arg
/* {{{ Constructor & destructor. */ DESCRIPTION
Nominal contructor intended for assigning references and initialize
trivial objects. Real initialization is made by init() method.
*/
Guardian::Guardian(Thread_registry *thread_registry_arg, Guardian::Guardian(Thread_registry *thread_registry_arg,
Instance_map *instance_map_arg, Instance_map *instance_map_arg)
uint monitoring_interval_arg) :shutdown_requested(FALSE),
:stopped(FALSE), stopped(FALSE),
monitoring_interval(monitoring_interval_arg),
thread_registry(thread_registry_arg), thread_registry(thread_registry_arg),
instance_map(instance_map_arg), instance_map(instance_map_arg)
shutdown_requested(FALSE)
{ {
pthread_mutex_init(&LOCK_guardian, 0); pthread_mutex_init(&LOCK_guardian, 0);
pthread_cond_init(&COND_guardian, 0); pthread_cond_init(&COND_guardian, 0);
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
} }
Guardian::~Guardian() Guardian::~Guardian()
{ {
/* delay guardian destruction to the moment when no one needs it */ /*
pthread_mutex_lock(&LOCK_guardian); NOTE: it's necessary to synchronize here, because Guiardian thread can be
free_root(&alloc, MYF(0)); still alive an hold the mutex (because it is detached and we have no
pthread_mutex_unlock(&LOCK_guardian); control over it).
*/
lock();
unlock();
pthread_mutex_destroy(&LOCK_guardian); pthread_mutex_destroy(&LOCK_guardian);
pthread_cond_destroy(&COND_guardian); pthread_cond_destroy(&COND_guardian);
} }
/* }}} */ /*************************************************************************
}}}
*************************************************************************/
/**
Send request to stop Guardian.
SYNOPSIS
request_shutdown()
*/
void Guardian::request_shutdown() void Guardian::request_shutdown()
{ {
pthread_mutex_lock(&LOCK_guardian);
/* STOP Instances or just clean up Guardian repository */
stop_instances(); stop_instances();
lock();
shutdown_requested= TRUE; shutdown_requested= TRUE;
pthread_mutex_unlock(&LOCK_guardian); unlock();
ping();
} }
void Guardian::process_instance(Instance *instance, /**
GUARD_NODE *current_node, Process an instance.
LIST **guarded_instances,
LIST *node) SYNOPSIS
process_instance()
instance a pointer to the instance for processing
MT-NOTE:
- the given instance must be locked before calling this operation;
- Guardian must be locked before calling this operation.
*/
void Guardian::process_instance(Instance *instance)
{ {
uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY;
/* The amount of times, Guardian attempts to restart an instance */
int restart_retry= 100; int restart_retry= 100;
time_t current_time= time(NULL); time_t current_time= time(NULL);
if (current_node->state == STOPPING) if (instance->get_state() == Instance::STOPPING)
{ {
waitchild= instance->options.get_shutdown_delay(); /* This brach is executed during shutdown. */
/* this returns TRUE if and only if an instance was stopped for sure */ /* This returns TRUE if and only if an instance was stopped for sure. */
if (instance->is_crashed()) if (instance->is_crashed())
*guarded_instances= list_delete(*guarded_instances, node);
else if ( (uint) (current_time - current_node->last_checked) > waitchild)
{ {
log_info("Guardian: '%s' stopped.",
(const char *) instance->get_name()->str);
instance->set_state(Instance::STOPPED);
}
else if ((uint) (current_time - instance->last_checked) >=
instance->options.get_shutdown_delay())
{
log_info("Guardian: '%s' hasn't stopped within %d secs.",
(const char *) instance->get_name()->str,
(int) instance->options.get_shutdown_delay());
instance->kill_mysqld(SIGKILL); instance->kill_mysqld(SIGKILL);
/*
Later we do node= node->next. This is ok, as we are only removing log_info("Guardian: pretend that '%s' is killed.",
the node from the list. The pointer to the next one is still valid. (const char *) instance->get_name()->str);
*/
*guarded_instances= list_delete(*guarded_instances, node); instance->set_state(Instance::STOPPED);
}
else
{
log_info("Guardian: waiting for '%s' to stop (%d secs left).",
(const char *) instance->get_name()->str,
(int) (instance->options.get_shutdown_delay() -
current_time + instance->last_checked));
} }
return; return;
...@@ -133,68 +158,72 @@ void Guardian::process_instance(Instance *instance, ...@@ -133,68 +158,72 @@ void Guardian::process_instance(Instance *instance,
/* The instance can be contacted on it's port */ /* The instance can be contacted on it's port */
/* If STARTING also check that pidfile has been created */ /* If STARTING also check that pidfile has been created */
if (current_node->state == STARTING && if (instance->get_state() == Instance::STARTING &&
current_node->instance->options.load_pid() == 0) instance->options.load_pid() == 0)
{ {
/* Pid file not created yet, don't go to STARTED state yet */ /* Pid file not created yet, don't go to STARTED state yet */
} }
else if (current_node->state != STARTED) else if (instance->get_state() != Instance::STARTED)
{ {
/* clear status fields */ /* clear status fields */
log_info("Guardian: '%s' is running, set state to STARTED.", log_info("Guardian: '%s' is running, set state to STARTED.",
(const char *) instance->options.instance_name.str); (const char *) instance->options.instance_name.str);
current_node->restart_counter= 0; instance->reset_stat();
current_node->crash_moment= 0; instance->set_state(Instance::STARTED);
current_node->state= STARTED;
} }
} }
else else
{ {
switch (current_node->state) { switch (instance->get_state()) {
case NOT_STARTED: case Instance::NOT_STARTED:
log_info("Guardian: starting '%s'...", log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str); (const char *) instance->options.instance_name.str);
/* NOTE, set state to STARTING _before_ start() is called */ /* NOTE: set state to STARTING _before_ start() is called. */
current_node->state= STARTING; instance->set_state(Instance::STARTING);
instance->start(); instance->last_checked= current_time;
current_node->last_checked= current_time;
break; instance->start_mysqld();
case STARTED: /* fallthrough */
case STARTING: /* let the instance start or crash */ return;
if (instance->is_crashed())
{ case Instance::STARTED: /* fallthrough */
current_node->crash_moment= current_time; case Instance::STARTING: /* let the instance start or crash */
current_node->last_checked= current_time; if (!instance->is_crashed())
current_node->state= JUST_CRASHED; return;
instance->crash_moment= current_time;
instance->last_checked= current_time;
instance->set_state(Instance::JUST_CRASHED);
/* fallthrough -- restart an instance immediately */ /* fallthrough -- restart an instance immediately */
}
else case Instance::JUST_CRASHED:
break; if (current_time - instance->crash_moment <= 2)
case JUST_CRASHED:
if (current_time - current_node->crash_moment <= 2)
{ {
if (instance->is_crashed()) if (instance->is_crashed())
{ {
instance->start(); instance->start_mysqld();
log_info("Guardian: starting '%s'...", log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str); (const char *) instance->options.instance_name.str);
} }
} }
else else
current_node->state= CRASHED; instance->set_state(Instance::CRASHED);
break;
case CRASHED: /* just regular restarts */ return;
if (current_time - current_node->last_checked >
monitoring_interval) case Instance::CRASHED: /* just regular restarts */
{ if (current_time - instance->last_checked <=
if ((current_node->restart_counter < restart_retry)) Options::Main::monitoring_interval)
return;
if (instance->restart_counter < restart_retry)
{ {
if (instance->is_crashed()) if (instance->is_crashed())
{ {
instance->start(); instance->start_mysqld();
current_node->last_checked= current_time; instance->last_checked= current_time;
current_node->restart_counter++;
log_info("Guardian: restarting '%s'...", log_info("Guardian: restarting '%s'...",
(const char *) instance->options.instance_name.str); (const char *) instance->options.instance_name.str);
} }
...@@ -204,12 +233,15 @@ void Guardian::process_instance(Instance *instance, ...@@ -204,12 +233,15 @@ void Guardian::process_instance(Instance *instance,
log_info("Guardian: can not start '%s'. " log_info("Guardian: can not start '%s'. "
"Abandoning attempts to (re)start it", "Abandoning attempts to (re)start it",
(const char *) instance->options.instance_name.str); (const char *) instance->options.instance_name.str);
current_node->state= CRASHED_AND_ABANDONED;
} instance->set_state(Instance::CRASHED_AND_ABANDONED);
} }
break;
case CRASHED_AND_ABANDONED: return;
break; /* do nothing */
case Instance::CRASHED_AND_ABANDONED:
return; /* do nothing */
default: default:
DBUG_ASSERT(0); DBUG_ASSERT(0);
} }
...@@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance, ...@@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance,
} }
/* /**
Main function of Guardian thread. Main function of Guardian thread.
SYNOPSIS SYNOPSIS
run() run()
DESCRIPTION DESCRIPTION
Check for all guarded instances and restart them if needed. If everything Check for all guarded instances and restart them if needed.
is fine go and sleep for some time.
*/ */
void Guardian::run() void Guardian::run()
{ {
Instance *instance;
LIST *node;
struct timespec timeout; struct timespec timeout;
log_info("Guardian: started."); log_info("Guardian: started.");
thread_registry->register_thread(&thread_info); thread_registry->register_thread(&thread_info);
pthread_mutex_lock(&LOCK_guardian); /* Loop, until all instances were shut down at the end. */
while (true)
{
Instance_map::Iterator instances_it(instance_map);
Instance *instance;
bool all_instances_stopped= TRUE;
instance_map->lock();
/* loop, until all instances were shut down at the end */ while ((instance= instances_it.next()))
while (!(shutdown_requested && (guarded_instances == NULL)))
{ {
node= guarded_instances; instance->lock();
while (node != NULL) if (!instance->is_guarded() ||
instance->get_state() == Instance::STOPPED)
{ {
GUARD_NODE *current_node= (GUARD_NODE *) node->data; instance->unlock();
instance= ((GUARD_NODE *) node->data)->instance; continue;
process_instance(instance, current_node, &guarded_instances, node); }
process_instance(instance);
node= node->next; if (instance->get_state() != Instance::STOPPED)
all_instances_stopped= FALSE;
instance->unlock();
}
instance_map->unlock();
lock();
if (shutdown_requested && all_instances_stopped)
{
log_info("Guardian: all guarded mysqlds stopped.");
stopped= TRUE;
unlock();
break;
} }
timeout.tv_sec= time(NULL) + monitoring_interval;
timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval;
timeout.tv_nsec= 0; timeout.tv_nsec= 0;
/* check the loop predicate before sleeping */
if (!(shutdown_requested && (!(guarded_instances))))
thread_registry->cond_timedwait(&thread_info, &COND_guardian, thread_registry->cond_timedwait(&thread_info, &COND_guardian,
&LOCK_guardian, &timeout); &LOCK_guardian, &timeout);
unlock();
} }
log_info("Guardian: stopped."); log_info("Guardian: stopped.");
stopped= TRUE; /* Now, when the Guardian is stopped we can stop the IM. */
pthread_mutex_unlock(&LOCK_guardian);
/* now, when the Guardian is stopped we can stop the IM */
thread_registry->unregister_thread(&thread_info); thread_registry->unregister_thread(&thread_info);
thread_registry->request_shutdown(); thread_registry->request_shutdown();
...@@ -274,129 +328,65 @@ void Guardian::run() ...@@ -274,129 +328,65 @@ void Guardian::run()
} }
int Guardian::is_stopped() /**
Return the value of stopped flag.
*/
bool Guardian::is_stopped()
{ {
int var; int var;
pthread_mutex_lock(&LOCK_guardian);
lock();
var= stopped; var= stopped;
pthread_mutex_unlock(&LOCK_guardian); unlock();
return var; return var;
} }
/* /**
Initialize the list of guarded instances: loop through the Instance_map and Wake up Guardian thread.
add all of the instances, which don't have 'nonguarded' option specified.
SYNOPSIS
Guardian::init()
NOTE: The operation should be invoked with the following locks acquired:
- Guardian;
- Instance_map;
RETURN MT-NOTE: though usually the mutex associated with condition variable should
0 - ok be acquired before signalling the variable, here this is not needed.
1 - error occurred Signalling under locked mutex is used to avoid lost signals. In the current
logic however locking mutex does not guarantee that the signal will not be
lost.
*/ */
int Guardian::init() void Guardian::ping()
{ {
Instance *instance; pthread_cond_signal(&COND_guardian);
Instance_map::Iterator iterator(instance_map);
/* clear the list of guarded instances */
free_root(&alloc, MYF(0));
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
guarded_instances= NULL;
while ((instance= iterator.next()))
{
if (instance->options.nonguarded)
continue;
if (guard(instance, TRUE)) /* do not lock guardian */
return 1;
}
return 0;
} }
/* /**
Add instance to the Guardian list Prepare list of instances.
SYNOPSIS SYNOPSIS
guard() init()
instance the instance to be guarded
nolock whether we prefer do not lock Guardian here,
but use external locking instead
DESCRIPTION
The instance is added to the guarded instances list. Usually guard() is
called after we start an instance.
RETURN MT-NOTE: Instance Map must be locked before calling the operation.
0 - ok
1 - error occurred
*/ */
int Guardian::guard(Instance *instance, bool nolock) void Guardian::init()
{ {
LIST *node; Instance *instance;
GUARD_NODE *content; Instance_map::Iterator iterator(instance_map);
node= (LIST *) alloc_root(&alloc, sizeof(LIST));
content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
if ((!(node)) || (!(content)))
return 1;
/* we store the pointers to instances from the instance_map's MEM_ROOT */
content->instance= instance;
content->restart_counter= 0;
content->crash_moment= 0;
content->state= NOT_STARTED;
node->data= (void*) content;
if (nolock)
guarded_instances= list_add(guarded_instances, node);
else
{
pthread_mutex_lock(&LOCK_guardian);
guarded_instances= list_add(guarded_instances, node);
pthread_mutex_unlock(&LOCK_guardian);
}
return 0;
}
/*
TODO: perhaps it would make sense to create a pool of the LIST nodeents
and give them upon request. Now we are loosing a bit of memory when
guarded instance was stopped and then restarted (since we cannot free just
a piece of the MEM_ROOT).
*/
int Guardian::stop_guard(Instance *instance)
{
LIST *node;
pthread_mutex_lock(&LOCK_guardian);
node= find_instance_node(instance);
if (node != NULL) while ((instance= iterator.next()))
guarded_instances= list_delete(guarded_instances, node); {
instance->lock();
pthread_mutex_unlock(&LOCK_guardian); instance->reset_stat();
instance->set_state(Instance::NOT_STARTED);
/* if there is nothing to delete it is also fine */ instance->unlock();
return 0; }
} }
/*
/**
An internal method which is called at shutdown to unregister instances and An internal method which is called at shutdown to unregister instances and
attempt to stop them if requested. attempt to stop them if requested.
...@@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance) ...@@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance)
accordingly. accordingly.
NOTE NOTE
Guardian object should be locked by the calling function. Guardian object should be locked by the caller.
RETURN
0 - ok
1 - error occurred
*/ */
int Guardian::stop_instances() void Guardian::stop_instances()
{ {
LIST *node; Instance_map::Iterator instances_it(instance_map);
node= guarded_instances; Instance *instance;
while (node != NULL)
instance_map->lock();
while ((instance= instances_it.next()))
{
instance->lock();
if (!instance->is_guarded() ||
instance->get_state() == Instance::STOPPED)
{ {
GUARD_NODE *current_node= (GUARD_NODE *) node->data; instance->unlock();
continue;
}
/* /*
If instance is running or was running (and now probably hanging), If instance is running or was running (and now probably hanging),
request stop. request stop.
*/ */
if (current_node->instance->is_mysqld_running() ||
(current_node->state == STARTED)) if (instance->is_mysqld_running() ||
instance->get_state() == Instance::STARTED)
{ {
current_node->state= STOPPING; instance->set_state(Instance::STOPPING);
current_node->last_checked= time(NULL); instance->last_checked= time(NULL);
} }
else else
/* otherwise remove it from the list */ {
guarded_instances= list_delete(guarded_instances, node); /* Otherwise mark it as STOPPED. */
/* But try to kill it anyway. Just in case */ instance->set_state(Instance::STOPPED);
current_node->instance->kill_mysqld(SIGTERM);
node= node->next;
} }
return 0;
}
void Guardian::lock()
{
pthread_mutex_lock(&LOCK_guardian);
}
void Guardian::unlock() /* Request mysqld to stop. */
{
pthread_mutex_unlock(&LOCK_guardian);
}
LIST *Guardian::find_instance_node(Instance *instance)
{
LIST *node= guarded_instances;
while (node != NULL) instance->kill_mysqld(SIGTERM);
{
/*
We compare only pointers, as we always use pointers from the
instance_map's MEM_ROOT.
*/
if (((GUARD_NODE *) node->data)->instance == instance)
return node;
node= node->next; instance->unlock();
} }
return NULL; instance_map->unlock();
} }
bool Guardian::is_active(Instance *instance) /**
{ Lock Guardian.
bool guarded; */
lock();
guarded= find_instance_node(instance) != NULL; void Guardian::lock()
{
pthread_mutex_lock(&LOCK_guardian);
}
/* is_running() can take a long time, so let's unlock mutex first. */
unlock();
if (guarded) /**
return true; Unlock Guardian.
*/
return instance->is_mysqld_running(); void Guardian::unlock()
{
pthread_mutex_unlock(&LOCK_guardian);
} }
...@@ -17,10 +17,12 @@ ...@@ -17,10 +17,12 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "thread_registry.h" #include <my_global.h>
#include <my_sys.h> #include <my_sys.h>
#include <my_list.h> #include <my_list.h>
#include "thread_registry.h"
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface #pragma interface
#endif #endif
...@@ -28,7 +30,6 @@ ...@@ -28,7 +30,6 @@
class Instance; class Instance;
class Instance_map; class Instance_map;
class Thread_registry; class Thread_registry;
struct GUARD_NODE;
/** /**
The guardian thread is responsible for monitoring and restarting of guarded The guardian thread is responsible for monitoring and restarting of guarded
...@@ -38,97 +39,73 @@ struct GUARD_NODE; ...@@ -38,97 +39,73 @@ struct GUARD_NODE;
class Guardian: public Thread class Guardian: public Thread
{ {
public: public:
/* states of an instance */ Guardian(Thread_registry *thread_registry_arg,
enum enum_instance_state { NOT_STARTED= 1, STARTING, STARTED, JUST_CRASHED, Instance_map *instance_map_arg);
CRASHED, CRASHED_AND_ABANDONED, STOPPING }; ~Guardian();
/*
The Guardian list node structure. Guardian utilizes it to store
guarded instances plus some additional info.
*/
struct GUARD_NODE void init();
{
Instance *instance;
/* state of an instance (i.e. STARTED, CRASHED, etc.) */
enum_instance_state state;
/* the amount of attemts to restart instance (cleaned up at success) */
int restart_counter;
/* triggered at a crash */
time_t crash_moment;
/* General time field. Used to provide timeouts (at shutdown and restart) */
time_t last_checked;
};
/* Return client state name. */
static const char *get_instance_state_name(enum_instance_state state);
Guardian(Thread_registry *thread_registry_arg, public:
Instance_map *instance_map_arg,
uint monitoring_interval_arg);
virtual ~Guardian();
/* Initialize or refresh the list of guarded instances */
int init();
/* Request guardian shutdown. Stop instances if needed */
void request_shutdown(); void request_shutdown();
/* Start instance protection */
int guard(Instance *instance, bool nolock= FALSE); bool is_stopped();
/* Stop instance protection */
int stop_guard(Instance *instance);
/* Returns TRUE if guardian thread is stopped */
int is_stopped();
void lock(); void lock();
void unlock(); void unlock();
/* void ping();
Return an internal list node for the given instance if the instance is
managed by Guardian. Otherwise, return NULL.
MT-NOTE: must be called under acquired lock. protected:
*/ virtual void run();
LIST *find_instance_node(Instance *instance);
private:
void stop_instances();
/* The operation is used to check if the instance is active or not. */ void process_instance(Instance *instance);
bool is_active(Instance *instance);
private:
/* /*
Return state of the given instance list node. The pointer must specify LOCK_guardian protectes the members in this section:
a valid list node. - shutdown_requested;
- stopped;
Also, it is used for COND_guardian.
*/ */
inline enum_instance_state get_instance_state(LIST *instance_node); pthread_mutex_t LOCK_guardian;
protected:
/* Main funtion of the thread */
virtual void run();
public: /*
Guardian's main loop waits on this condition. So, it should be signalled
each time, when instance state has been changed and we want Guardian to
wake up.
TODO: Change this to having data-scoped conditions, i.e. conditions,
which indicate that some data has been changed.
*/
pthread_cond_t COND_guardian; pthread_cond_t COND_guardian;
private: /*
/* Prepares Guardian shutdown. Stops instances is needed */ This variable is set to TRUE, when Manager thread is shutting down.
int stop_instances(); The flag is used by Guardian thread to understand that it's time to
/* check instance state and act accordingly */ finish.
void process_instance(Instance *instance, GUARD_NODE *current_node, */
LIST **guarded_instances, LIST *elem); bool shutdown_requested;
/*
This flag is set to TRUE on shutdown by Guardian thread, when all guarded
mysqlds are stopped.
int stopped; The flag is used in the Manager thread to wait for Guardian to stop all
mysqlds.
*/
bool stopped;
private:
pthread_mutex_t LOCK_guardian;
Thread_info thread_info; Thread_info thread_info;
int monitoring_interval;
Thread_registry *thread_registry; Thread_registry *thread_registry;
Instance_map *instance_map; Instance_map *instance_map;
LIST *guarded_instances;
MEM_ROOT alloc;
/* this variable is set to TRUE when we want to stop Guardian thread */
bool shutdown_requested;
};
inline Guardian::enum_instance_state private:
Guardian::get_instance_state(LIST *instance_node) Guardian(const Guardian &);
{ Guardian&operator =(const Guardian &);
return ((GUARD_NODE *) instance_node->data)->state; };
}
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */ #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */
...@@ -36,7 +36,9 @@ ...@@ -36,7 +36,9 @@
#include "thread_registry.h" #include "thread_registry.h"
#include "instance_map.h" #include "instance_map.h"
/* {{{ Platform-specific functions. */ /*************************************************************************
{{{ Platform-specific functions.
*************************************************************************/
#ifndef __WIN__ #ifndef __WIN__
typedef pid_t My_process_info; typedef pid_t My_process_info;
...@@ -44,34 +46,6 @@ typedef pid_t My_process_info; ...@@ -44,34 +46,6 @@ typedef pid_t My_process_info;
typedef PROCESS_INFORMATION My_process_info; typedef PROCESS_INFORMATION My_process_info;
#endif #endif
/*
Proxy thread is a simple way to avoid all pitfalls of the threads
implementation in the OS (e.g. LinuxThreads). With such a thread we
don't have to process SIGCHLD, which is a tricky business if we want
to do it in a portable way.
*/
class Instance_monitor: public Thread
{
public:
Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
protected:
virtual void run();
void start_and_monitor_instance(Instance_options *old_instance_options,
Instance_map *instance_map,
Thread_registry *thread_registry);
private:
Instance *instance;
};
void Instance_monitor::run()
{
start_and_monitor_instance(&instance->options,
Manager::get_instance_map(),
Manager::get_thread_registry());
delete this;
}
/* /*
Wait for an instance Wait for an instance
...@@ -285,113 +259,149 @@ int kill(pid_t pid, int signum) ...@@ -285,113 +259,149 @@ int kill(pid_t pid, int signum)
} }
#endif #endif
/* }}} */ /*************************************************************************
}}}
*************************************************************************/
/* {{{ Static constants. */
/*************************************************************************
{{{ Static constants.
*************************************************************************/
const LEX_STRING const LEX_STRING
Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") }; Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") };
/* }}} */ /*************************************************************************
}}}
*************************************************************************/
/* /*************************************************************************
Fork child, exec an instance and monitor it. {{{ Instance Monitor thread.
*************************************************************************/
SYNOPSIS /**
start_and_monitor_instance() Proxy thread is a simple way to avoid all pitfalls of the threads
old_instance_options Pointer to the options of the instance to be implementation in the OS (e.g. LinuxThreads). With such a thread we
launched. This info is likely to become obsolete don't have to process SIGCHLD, which is a tricky business if we want
when function returns from wait_process() to do it in a portable way.
instance_map Pointer to the instance_map. We use it to protect
the instance from deletion, while we are working
with it.
DESCRIPTION Instance Monitor Thread forks a child process, execs mysqld and waits for
Fork a child, then exec and monitor it. When the child is dead, the child to die.
find appropriate instance (for this purpose we save its name),
set appropriate flags and wake all threads waiting for instance
to stop.
NOTE
A separate thread for starting/monitoring instance is a simple way
to avoid all pitfalls of the threads implementation in the OS (e.g.
LinuxThreads). For one, with such a thread we don't have to process
SIGCHLD, which is a tricky business if we want to do it in a
portable way.
RETURN Instance Monitor assumes that the monitoring instance will not be dropped.
Function returns no value This is guaranteed by having flag monitoring_thread_active and
Instance::is_active() operation.
*/ */
void class Instance_monitor: public Thread
Instance_monitor::
start_and_monitor_instance(Instance_options *old_instance_options,
Instance_map *instance_map,
Thread_registry *thread_registry)
{ {
Instance_name instance_name(&old_instance_options->instance_name); public:
Instance *current_instance; Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
My_process_info process_info; protected:
Thread_info thread_info; virtual void run();
void start_and_monitor_instance();
private:
Instance *instance;
};
void Instance_monitor::run()
{
start_and_monitor_instance();
delete this;
}
void Instance_monitor::start_and_monitor_instance()
{
Thread_registry *thread_registry= Manager::get_thread_registry();
Guardian *guardian= Manager::get_guardian();
My_process_info mysqld_process_info;
Thread_info monitor_thread_info;
log_info("Instance '%s': Monitor: started.", log_info("Instance '%s': Monitor: started.",
(const char *) instance->get_name()->str); (const char *) instance->get_name()->str);
if (!old_instance_options->nonguarded)
{
/* /*
Register thread in Thread_registry to wait for it to stop on shutdown For guarded instance register the thread in Thread_registry to wait for
only if instance is guarded. If instance is guarded, the thread will not the thread to stop on shutdown (nonguarded instances are not stopped on
finish, because nonguarded instances are not stopped on shutdown. shutdown, so the thread will no finish).
*/ */
thread_registry->register_thread(&thread_info, FALSE);
if (instance->is_guarded())
{
thread_registry->register_thread(&monitor_thread_info, FALSE);
} }
/* /* Starting mysqld. */
Lock instance map to guarantee that no instances are deleted during
strmake() and execv() calls.
*/
instance_map->lock();
/*
Save the instance name in the case if Instance object we
are using is destroyed. (E.g. by "FLUSH INSTANCES")
*/
log_info("Instance '%s': Monitor: starting mysqld...", log_info("Instance '%s': Monitor: starting mysqld...",
(const char *) instance->get_name()->str); (const char *) instance->get_name()->str);
if (start_process(old_instance_options, &process_info)) if (start_process(&instance->options, &mysqld_process_info))
{ {
instance_map->unlock(); instance->lock();
return; /* error is logged */ instance->monitoring_thread_active= FALSE;
instance->unlock();
return;
} }
/* allow users to delete instances */ /* Waiting for mysqld to die. */
instance_map->unlock();
log_info("Instance '%s': Monitor: waiting for mysqld to stop...", log_info("Instance '%s': Monitor: waiting for mysqld to stop...",
(const char *) instance->get_name()->str); (const char *) instance->get_name()->str);
wait_process(&process_info); /* Don't check for return value. */ wait_process(&mysqld_process_info); /* Don't check for return value. */
instance_map->lock(); log_info("Instance '%s': Monitor: mysqld stopped.",
(const char *) instance->get_name()->str);
current_instance= instance_map->find(instance_name.get_str()); /* Update instance status. */
if (current_instance) instance->lock();
current_instance->set_crash_flag_n_wake_all();
instance_map->unlock(); if (instance->is_guarded())
thread_registry->unregister_thread(&monitor_thread_info);
if (!old_instance_options->nonguarded) instance->crashed= TRUE;
thread_registry->unregister_thread(&thread_info); instance->monitoring_thread_active= FALSE;
log_info("Instance '%s': Monitor: finished.", log_info("Instance '%s': Monitor: finished.",
(const char *) instance->get_name()->str); (const char *) instance->get_name()->str);
instance->unlock();
/* Wake up guardian. */
guardian->ping();
} }
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Static operations.
**************************************************************************/
/**
The operation is intended to check whether string is a well-formed
instance name or not.
SYNOPSIS
is_name_valid()
name string to check
RETURN
TRUE string is a valid instance name
FALSE string is not a valid instance name
TODO: Move to Instance_name class: Instance_name::is_valid().
*/
bool Instance::is_name_valid(const LEX_STRING *name) bool Instance::is_name_valid(const LEX_STRING *name)
{ {
...@@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name) ...@@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name)
} }
/**
The operation is intended to check if the given instance name is
mysqld-compatible or not.
SYNOPSIS
is_mysqld_compatible_name()
name name to check
RETURN
TRUE name is mysqld-compatible
FALSE otherwise
TODO: Move to Instance_name class: Instance_name::is_mysqld_compatible().
*/
bool Instance::is_mysqld_compatible_name(const LEX_STRING *name) bool Instance::is_mysqld_compatible_name(const LEX_STRING *name)
{ {
return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0; return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0;
} }
/**
Return client state name. Must not be used outside the class.
Use Instance::get_state_name() instead.
*/
const char * Instance::get_instance_state_name(enum_instance_state state)
{
switch (state) {
case STOPPED:
return "offline";
case NOT_STARTED:
return "not started";
case STARTING:
return "starting";
case STARTED:
return "online";
case JUST_CRASHED:
return "failed";
case CRASHED:
return "crashed";
case CRASHED_AND_ABANDONED:
return "abandoned";
case STOPPING:
return "stopping";
}
return NULL; /* just to ignore compiler warning. */
}
/**************************************************************************
}}}
**************************************************************************/
/* {{{ Constructor & destructor */ /**************************************************************************
{{{ Initialization & deinitialization.
**************************************************************************/
Instance::Instance() Instance::Instance()
:crashed(FALSE), :monitoring_thread_active(FALSE),
configured(FALSE) crashed(FALSE),
configured(FALSE),
/* mysqld_compatible is initialized in init() */
state(NOT_STARTED),
restart_counter(0),
crash_moment(0),
last_checked(0)
{ {
pthread_mutex_init(&LOCK_instance, 0); pthread_mutex_init(&LOCK_instance, 0);
pthread_cond_init(&COND_instance_stopped, 0);
} }
...@@ -427,13 +499,11 @@ Instance::~Instance() ...@@ -427,13 +499,11 @@ Instance::~Instance()
{ {
log_info("Instance '%s': destroying...", (const char *) get_name()->str); log_info("Instance '%s': destroying...", (const char *) get_name()->str);
pthread_cond_destroy(&COND_instance_stopped);
pthread_mutex_destroy(&LOCK_instance); pthread_mutex_destroy(&LOCK_instance);
} }
/* }}} */
/* /**
Initialize instance options. Initialize instance options.
SYNOPSIS SYNOPSIS
...@@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg) ...@@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg)
} }
/* /**
Complete instance options initialization. Complete instance options initialization.
SYNOPSIS SYNOPSIS
...@@ -474,7 +544,47 @@ bool Instance::complete_initialization() ...@@ -474,7 +544,47 @@ bool Instance::complete_initialization()
*/ */
} }
/* /**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Instance: public interface implementation.
**************************************************************************/
/**
Determine if there is some activity with the instance.
SYNOPSIS
is_active()
DESCRIPTION
An instance is active if one of the following conditions is true:
- Instance-monitoring thread is running;
- Instance is guarded and its state is other than STOPPED;
- Corresponding mysqld-server accepts connections.
MT-NOTE: instance must be locked before calling the operation.
RETURN
TRUE - instance is active
FALSE - otherwise.
*/
bool Instance::is_active()
{
if (monitoring_thread_active)
return TRUE;
if (is_guarded() && get_state() != STOPPED)
return TRUE;
return is_mysqld_running();
}
/**
Determine if mysqld is accepting connections. Determine if mysqld is accepting connections.
SYNOPSIS SYNOPSIS
...@@ -484,7 +594,7 @@ bool Instance::complete_initialization() ...@@ -484,7 +594,7 @@ bool Instance::complete_initialization()
Try to connect to mysqld with fake login/password to check whether it is Try to connect to mysqld with fake login/password to check whether it is
accepting connections or not. accepting connections or not.
MT-NOTE: this operation must be called under acquired LOCK_instance. MT-NOTE: instance must be locked before calling the operation.
RETURN RETURN
TRUE - mysqld is alive and accept connections TRUE - mysqld is alive and accept connections
...@@ -508,8 +618,6 @@ bool Instance::is_mysqld_running() ...@@ -508,8 +618,6 @@ bool Instance::is_mysqld_running()
if (!port && !options.mysqld_socket) if (!port && !options.mysqld_socket)
port= SERVER_DEFAULT_PORT; port= SERVER_DEFAULT_PORT;
pthread_mutex_lock(&LOCK_instance);
mysql_init(&mysql); mysql_init(&mysql);
/* try to connect to a server with a fake username/password pair */ /* try to connect to a server with a fake username/password pair */
if (mysql_real_connect(&mysql, LOCAL_HOST, username, if (mysql_real_connect(&mysql, LOCAL_HOST, username,
...@@ -523,7 +631,6 @@ bool Instance::is_mysqld_running() ...@@ -523,7 +631,6 @@ bool Instance::is_mysqld_running()
*/ */
log_error("Instance '%s': was able to log into mysqld.", log_error("Instance '%s': was able to log into mysqld.",
(const char *) get_name()->str); (const char *) get_name()->str);
pthread_mutex_unlock(&LOCK_instance);
return_val= TRUE; /* server is alive */ return_val= TRUE; /* server is alive */
} }
else else
...@@ -531,145 +638,145 @@ bool Instance::is_mysqld_running() ...@@ -531,145 +638,145 @@ bool Instance::is_mysqld_running()
sizeof(access_denied_message) - 1)); sizeof(access_denied_message) - 1));
mysql_close(&mysql); mysql_close(&mysql);
pthread_mutex_unlock(&LOCK_instance);
return return_val; return return_val;
} }
/*
The method starts an instance. /**
Start mysqld.
SYNOPSIS SYNOPSIS
start() start_mysqld()
DESCRIPTION
Reset flags and start Instance Monitor thread, which will start mysqld.
MT-NOTE: instance must be locked before calling the operation.
RETURN RETURN
0 ok FALSE - ok
ER_CANNOT_START_INSTANCE Cannot start instance TRUE - could not start instance
ER_INSTANCE_ALREADY_STARTED The instance on the specified port/socket
is already started
*/ */
int Instance::start() bool Instance::start_mysqld()
{ {
/* clear crash flag */ Instance_monitor *instance_monitor;
pthread_mutex_lock(&LOCK_instance);
crashed= FALSE; /*
pthread_mutex_unlock(&LOCK_instance); Prepare instance to start Instance Monitor thread.
NOTE: It's important to set these actions here in order to avoid
race conditions -- these actions must be done under acquired lock on
Instance.
*/
crashed= FALSE;
monitoring_thread_active= TRUE;
if (configured && !is_mysqld_running())
{
Instance_monitor *instance_monitor;
remove_pid(); remove_pid();
/* Create and start the Instance Monitor thread. */
instance_monitor= new Instance_monitor(this); instance_monitor= new Instance_monitor(this);
if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED)) if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED))
{ {
delete instance_monitor; delete instance_monitor;
log_error("Instance::start(): failed to create the monitoring thread" monitoring_thread_active= FALSE;
" to start an instance");
return ER_CANNOT_START_INSTANCE;
}
/* The monitoring thread will delete itself when it's finished. */
return 0; log_error("Instance '%s': can not create instance monitor thread.",
} (const char *) get_name()->str);
/* The instance is started already or misconfigured. */
return configured ? ER_INSTANCE_ALREADY_STARTED : ER_INSTANCE_MISCONFIGURED;
}
/*
The method sets the crash flag and wakes all waiters on
COND_instance_stopped and COND_guardian
SYNOPSIS return TRUE;
set_crash_flag_n_wake_all() }
DESCRIPTION ++restart_counter;
The method is called when an instance is crashed or terminated.
In the former case it might indicate that guardian probably should
restart it.
RETURN /* The Instance Monitor thread will delete itself when it's finished. */
Function returns no value
*/
void Instance::set_crash_flag_n_wake_all() return FALSE;
{
/* set instance state to crashed */
pthread_mutex_lock(&LOCK_instance);
crashed= TRUE;
pthread_mutex_unlock(&LOCK_instance);
/*
Wake connection threads waiting for an instance to stop. This
is needed if a user issued command to stop an instance via
mysql connection. This is not the case if Guardian stop the thread.
*/
pthread_cond_signal(&COND_instance_stopped);
/* wake guardian */
pthread_cond_signal(&Manager::get_guardian()->COND_guardian);
} }
/* /**
Stop an instance. Stop mysqld.
SYNOPSIS SYNOPSIS
stop() stop_mysqld()
RETURN: DESCRIPTION
0 ok Try to stop mysqld gracefully. Otherwise kill it with SIGKILL.
ER_INSTANCE_IS_NOT_STARTED Looks like the instance it is not started
ER_STOP_INSTANCE mysql_shutdown reported an error MT-NOTE: instance must be locked before calling the operation.
RETURN
FALSE - ok
TRUE - could not stop the instance
*/ */
int Instance::stop() bool Instance::stop_mysqld()
{ {
struct timespec timeout; log_info("Instance '%s': stopping mysqld...",
uint waitchild= (uint) DEFAULT_SHUTDOWN_DELAY; (const char *) get_name()->str);
if (is_mysqld_running())
{
waitchild= options.get_shutdown_delay();
kill_mysqld(SIGTERM); kill_mysqld(SIGTERM);
/* sleep on condition to wait for SIGCHLD */
timeout.tv_sec= time(NULL) + waitchild; if (!wait_for_stop())
timeout.tv_nsec= 0;
if (pthread_mutex_lock(&LOCK_instance))
return ER_STOP_INSTANCE;
while (options.load_pid() != 0) /* while server isn't stopped */
{ {
int status; log_info("Instance '%s': mysqld stopped gracefully.",
(const char *) get_name()->str);
status= pthread_cond_timedwait(&COND_instance_stopped, return FALSE;
&LOCK_instance,
&timeout);
if (status == ETIMEDOUT || status == ETIME)
break;
} }
pthread_mutex_unlock(&LOCK_instance); log_info("Instance '%s': mysqld failed to stop gracefully within %d seconds.",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
log_info("Instance'%s': killing mysqld...",
(const char *) get_name()->str);
kill_mysqld(SIGKILL); kill_mysqld(SIGKILL);
return 0; if (!wait_for_stop())
{
log_info("Instance '%s': mysqld has been killed.",
(const char *) get_name()->str);
return FALSE;
} }
return ER_INSTANCE_IS_NOT_STARTED; log_info("Instance '%s': can not kill mysqld within %d seconds.",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
return TRUE;
} }
/* /**
Send signal to mysqld. Send signal to mysqld.
SYNOPSIS SYNOPSIS
kill_mysqld() kill_mysqld()
DESCRIPTION
Load pid from the pid file and send the given signal to that process.
If the signal is SIGKILL, remove the pid file after sending the signal.
MT-NOTE: instance must be locked before calling the operation.
TODO
This too low-level and OS-specific operation for public interface.
Also, it has some implicit behaviour for SIGKILL signal. Probably, we
should have the following public operations instead:
- start_mysqld() -- as is;
- stop_mysqld -- request mysqld to shutdown gracefully (send SIGTERM);
don't wait for complete shutdown;
- wait_for_stop() (or join_mysqld()) -- wait for mysqld to stop within
time interval;
- kill_mysqld() -- request to terminate mysqld; don't wait for
completion.
These operations should also be used in Guardian to manage instances.
*/ */
void Instance::kill_mysqld(int signum) void Instance::kill_mysqld(int signum)
...@@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum) ...@@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum)
} }
} }
/*
Return crashed flag.
SYNOPSIS
is_crashed()
RETURN /**
TRUE - mysqld crashed Lock instance.
FALSE - mysqld hasn't crashed yet
*/ */
bool Instance::is_crashed() void Instance::lock()
{ {
bool val;
pthread_mutex_lock(&LOCK_instance); pthread_mutex_lock(&LOCK_instance);
val= crashed; }
/**
Unlock instance.
*/
void Instance::unlock()
{
pthread_mutex_unlock(&LOCK_instance); pthread_mutex_unlock(&LOCK_instance);
return val;
} }
/*
/**
Return instance state name.
SYNOPSIS
get_state_name()
DESCRIPTION
The operation returns user-friendly state name. The operation can be
used both for guarded and non-guarded instances.
MT-NOTE: instance must be locked before calling the operation.
TODO: Replace with the static get_state_name(state_code) function.
*/
const char *Instance::get_state_name()
{
if (!is_configured())
return "misconfigured";
if (is_guarded())
{
/* The instance is managed by Guardian: we can report precise state. */
return get_instance_state_name(get_state());
}
/* The instance is not managed by Guardian: we can report status only. */
return is_active() ? "online" : "offline";
}
/**
Reset statistics.
SYNOPSIS
reset_stat()
DESCRIPTION
The operation resets statistics used for guarding the instance.
MT-NOTE: instance must be locked before calling the operation.
TODO: Make private.
*/
void Instance::reset_stat()
{
restart_counter= 0;
crash_moment= 0;
last_checked= 0;
}
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Instance: implementation of private operations.
**************************************************************************/
/**
Remove pid file. Remove pid file.
*/ */
...@@ -744,3 +915,36 @@ void Instance::remove_pid() ...@@ -744,3 +915,36 @@ void Instance::remove_pid()
(const char *) options.instance_name.str); (const char *) options.instance_name.str);
} }
} }
/**
Wait for mysqld to stop within shutdown interval.
*/
bool Instance::wait_for_stop()
{
int start_time= time(NULL);
int finish_time= start_time + options.get_shutdown_delay();
log_info("Instance '%s': waiting for mysqld to stop "
"(timeout: %d seconds)...",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
while (true)
{
if (options.load_pid() == 0 && !is_mysqld_running())
return FALSE;
if (time(NULL) >= finish_time)
return TRUE;
/* Sleep for 0.3 sec and check again. */
my_sleep(300000);
}
}
/**************************************************************************
}}}
**************************************************************************/
...@@ -30,7 +30,7 @@ class Instance_map; ...@@ -30,7 +30,7 @@ class Instance_map;
class Thread_registry; class Thread_registry;
/* /**
Instance_name -- the class represents instance name -- a string of length Instance_name -- the class represents instance name -- a string of length
less than MAX_INSTANCE_NAME_SIZE. less than MAX_INSTANCE_NAME_SIZE.
...@@ -68,72 +68,127 @@ private: ...@@ -68,72 +68,127 @@ private:
class Instance class Instance
{ {
public: public:
/* /* States of an instance. */
The following two constants defines name of the default mysqld-instance enum enum_instance_state
("mysqld"). {
STOPPED,
NOT_STARTED,
STARTING,
STARTED,
JUST_CRASHED,
CRASHED,
CRASHED_AND_ABANDONED,
STOPPING
};
public:
/**
The constant defines name of the default mysqld-instance ("mysqld").
*/ */
static const LEX_STRING DFLT_INSTANCE_NAME; static const LEX_STRING DFLT_INSTANCE_NAME;
public: public:
/*
The operation is intended to check whether string is a well-formed
instance name or not.
*/
static bool is_name_valid(const LEX_STRING *name); static bool is_name_valid(const LEX_STRING *name);
/*
The operation is intended to check if the given instance name is
mysqld-compatible or not.
*/
static bool is_mysqld_compatible_name(const LEX_STRING *name); static bool is_mysqld_compatible_name(const LEX_STRING *name);
public: public:
Instance(); Instance();
~Instance(); ~Instance();
bool init(const LEX_STRING *name_arg); bool init(const LEX_STRING *name_arg);
bool complete_initialization(); bool complete_initialization();
public:
bool is_active();
bool is_mysqld_running(); bool is_mysqld_running();
int start();
int stop(); bool start_mysqld();
/* send a signal to the instance */ bool stop_mysqld();
void kill_mysqld(int signo); void kill_mysqld(int signo);
bool is_crashed();
void set_crash_flag_n_wake_all();
/* void lock();
void unlock();
const char *get_state_name();
void reset_stat();
public:
/**
The operation is intended to check if the instance is mysqld-compatible The operation is intended to check if the instance is mysqld-compatible
or not. or not.
*/ */
inline bool is_mysqld_compatible() const; inline bool is_mysqld_compatible() const;
/* /**
The operation is intended to check if the instance is configured properly The operation is intended to check if the instance is configured properly
or not. Misconfigured instances are not managed. or not. Misconfigured instances are not managed.
*/ */
inline bool is_configured() const; inline bool is_configured() const;
/**
The operation returns TRUE if the instance is guarded and FALSE otherwise.
*/
inline bool is_guarded() const;
/**
The operation returns name of the instance.
*/
inline const LEX_STRING *get_name() const; inline const LEX_STRING *get_name() const;
/**
The operation returns the current state of the instance.
NOTE: At the moment should be used only for guarded instances.
*/
inline enum_instance_state get_state() const;
/**
The operation changes the state of the instance.
NOTE: At the moment should be used only for guarded instances.
TODO: Make private.
*/
inline void set_state(enum_instance_state new_state);
/**
The operation returns crashed flag.
*/
inline bool is_crashed();
public: public:
enum { DEFAULT_SHUTDOWN_DELAY= 35 }; /**
This attributes contains instance options.
TODO: Make private.
*/
Instance_options options; Instance_options options;
private: private:
/* This attributes is a flag, specifies if the instance has been crashed. */ /**
monitoring_thread_active is TRUE if there is a thread that monitors the
corresponding mysqld-process.
*/
bool monitoring_thread_active;
/**
crashed is TRUE when corresponding mysqld-process has been died after
start.
*/
bool crashed; bool crashed;
/* /**
This attribute specifies if the instance is configured properly or not. configured is TRUE when the instance is configured and FALSE otherwise.
Misconfigured instances are not managed. Misconfigured instances are not managed.
*/ */
bool configured; bool configured;
/* /*
This attribute specifies whether the instance is mysqld-compatible or not. mysqld_compatible specifies whether the instance is mysqld-compatible
Mysqld-compatible instances can contain only mysqld-specific options. or not. Mysqld-compatible instances can contain only mysqld-specific
At the moment an instance is mysqld-compatible if its name is "mysqld". options. At the moment an instance is mysqld-compatible if its name is
"mysqld".
The idea is that [mysqld] section should contain only mysqld-specific The idea is that [mysqld] section should contain only mysqld-specific
options (no Instance Manager-specific options) to be readable by mysqld options (no Instance Manager-specific options) to be readable by mysqld
...@@ -142,18 +197,36 @@ private: ...@@ -142,18 +197,36 @@ private:
bool mysqld_compatible; bool mysqld_compatible;
/* /*
Mutex protecting the instance. Currently we use it to avoid the Mutex protecting the instance.
double start of the instance. This happens when the instance is starting
and we issue the start command once more.
*/ */
pthread_mutex_t LOCK_instance; pthread_mutex_t LOCK_instance;
/*
This condition variable is used to wake threads waiting for instance to
stop in Instance::stop()
*/
pthread_cond_t COND_instance_stopped;
private:
/* Guarded-instance attributes. */
/* state of an instance (i.e. STARTED, CRASHED, etc.) */
enum_instance_state state;
public:
/* the amount of attemts to restart instance (cleaned up at success) */
int restart_counter;
/* triggered at a crash */
time_t crash_moment;
/* General time field. Used to provide timeouts (at shutdown and restart) */
time_t last_checked;
private:
static const char *get_instance_state_name(enum_instance_state state);
private:
void remove_pid(); void remove_pid();
bool wait_for_stop();
private:
friend class Instance_monitor;
}; };
...@@ -169,9 +242,33 @@ inline bool Instance::is_configured() const ...@@ -169,9 +242,33 @@ inline bool Instance::is_configured() const
} }
inline bool Instance::is_guarded() const
{
return !options.nonguarded;
}
inline const LEX_STRING *Instance::get_name() const inline const LEX_STRING *Instance::get_name() const
{ {
return &options.instance_name; return &options.instance_name;
} }
inline Instance::enum_instance_state Instance::get_state() const
{
return state;
}
inline void Instance::set_state(enum_instance_state new_state)
{
state= new_state;
}
inline bool Instance::is_crashed()
{
return crashed;
}
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */ #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */
...@@ -25,26 +25,18 @@ ...@@ -25,26 +25,18 @@
#include <mysql_com.h> #include <mysql_com.h>
#include "buffer.h" #include "buffer.h"
#include "guardian.h"
#include "instance.h" #include "instance.h"
#include "log.h" #include "log.h"
#include "manager.h"
#include "mysqld_error.h" #include "mysqld_error.h"
#include "mysql_manager_error.h" #include "mysql_manager_error.h"
#include "options.h" #include "options.h"
#include "priv.h" #include "priv.h"
/*
Note: As we are going to suppost different types of connections,
we shouldn't have connection-specific functions. To avoid it we could
put such functions to the Command-derived class instead.
The command could be easily constructed for a specific connection if
we would provide a special factory for each connection.
*/
C_MODE_START C_MODE_START
/* Procedure needed for HASH initialization */ /**
HASH-routines: get key of instance for storing in hash.
*/
static byte* get_instance_key(const byte* u, uint* len, static byte* get_instance_key(const byte* u, uint* len,
my_bool __attribute__((unused)) t) my_bool __attribute__((unused)) t)
...@@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len, ...@@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len,
return (byte *) instance->options.instance_name.str; return (byte *) instance->options.instance_name.str;
} }
/**
HASH-routines: cleanup handler.
*/
static void delete_instance(void *u) static void delete_instance(void *u)
{ {
Instance *instance= (Instance *) u; Instance *instance= (Instance *) u;
delete instance; delete instance;
} }
/* /**
The option handler to pass to the process_default_option_files finction. The option handler to pass to the process_default_option_files function.
SYNOPSIS SYNOPSIS
process_option() process_option()
...@@ -96,7 +92,7 @@ static int process_option(void *ctx, const char *group, const char *option) ...@@ -96,7 +92,7 @@ static int process_option(void *ctx, const char *group, const char *option)
C_MODE_END C_MODE_END
/* /**
Parse option string. Parse option string.
SYNOPSIS SYNOPSIS
...@@ -137,7 +133,7 @@ static void parse_option(const char *option_str, ...@@ -137,7 +133,7 @@ static void parse_option(const char *option_str,
} }
/* /**
Process one option from the configuration file. Process one option from the configuration file.
SYNOPSIS SYNOPSIS
...@@ -151,6 +147,10 @@ static void parse_option(const char *option_str, ...@@ -151,6 +147,10 @@ static void parse_option(const char *option_str,
process_option(). The caller ensures proper locking process_option(). The caller ensures proper locking
of the instance map object. of the instance map object.
*/ */
/*
Process a given option and assign it to appropricate instance. This is
required for the option handler, passed to my_search_option_files().
*/
int Instance_map::process_one_option(const LEX_STRING *group, int Instance_map::process_one_option(const LEX_STRING *group,
const char *option) const char *option)
...@@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group, ...@@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group,
} }
/**
Instance_map constructor.
*/
Instance_map::Instance_map() Instance_map::Instance_map()
{ {
pthread_mutex_init(&LOCK_instance_map, 0); pthread_mutex_init(&LOCK_instance_map, 0);
} }
/**
Initialize Instance_map internals.
*/
bool Instance_map::init() bool Instance_map::init()
{ {
return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0, return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
get_instance_key, delete_instance, 0); get_instance_key, delete_instance, 0);
} }
/**
Reset Instance_map data.
*/
bool Instance_map::reset()
{
hash_free(&hash);
return init();
}
/**
Instance_map destructor.
*/
Instance_map::~Instance_map() Instance_map::~Instance_map()
{ {
pthread_mutex_lock(&LOCK_instance_map); lock();
/*
NOTE: it's necessary to synchronize on each instance before removal,
because Instance-monitoring thread can be still alive an hold the mutex
(because it is detached and we have no control over it).
*/
while (true)
{
Iterator it(this);
Instance *instance= it.next();
if (!instance)
break;
instance->lock();
instance->unlock();
remove_instance(instance);
}
hash_free(&hash); hash_free(&hash);
pthread_mutex_unlock(&LOCK_instance_map); unlock();
pthread_mutex_destroy(&LOCK_instance_map); pthread_mutex_destroy(&LOCK_instance_map);
} }
/**
Lock Instance_map.
*/
void Instance_map::lock() void Instance_map::lock()
{ {
pthread_mutex_lock(&LOCK_instance_map); pthread_mutex_lock(&LOCK_instance_map);
} }
/**
Unlock Instance_map.
*/
void Instance_map::unlock() void Instance_map::unlock()
{ {
pthread_mutex_unlock(&LOCK_instance_map); pthread_mutex_unlock(&LOCK_instance_map);
} }
/*
Re-read instance configuration file.
SYNOPSIS
Instance_map::flush_instances()
DESCRIPTION /**
This function will: Check if there is an active instance or not.
- clear the current list of instances. This removes both
running and stopped instances.
- load a new instance configuration from the file.
- pass on the new map to the guardian thread: it will start
all instances that are marked `guarded' and not yet started.
Note, as the check whether an instance is started is currently
very simple (returns TRUE if there is a MySQL server running
at the given port), this function has some peculiar
side-effects:
* if the port number of a running instance was changed, the
old instance is forgotten, even if it was running. The new
instance will be started at the new port.
* if the configuration was changed in a way that two
instances swapped their port numbers, the guardian thread
will not notice that and simply report that both instances
are configured successfully and running.
In order to avoid such side effects one should never call
FLUSH INSTANCES without prior stop of all running instances.
NOTE: The operation should be invoked with the following locks acquired:
- Guardian;
- Instance_map;
*/ */
int Instance_map::flush_instances()
{
int rc;
/*
Guardian thread relies on the instance map repository for guarding
instances. This is why refreshing instance map, we need (1) to stop
guardian (2) reload the instance map (3) reinitialize the guardian
with new instances.
*/
hash_free(&hash);
hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
get_instance_key, delete_instance, 0);
rc= load();
/* don't init guardian if we failed to load instances */
if (!rc)
guardian->init(); // TODO: check error status.
return rc;
}
bool Instance_map::is_there_active_instance() bool Instance_map::is_there_active_instance()
{ {
Instance *instance; Instance *instance;
...@@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance() ...@@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance()
while ((instance= iterator.next())) while ((instance= iterator.next()))
{ {
if (guardian->find_instance_node(instance) != NULL || bool active_instance_found;
instance->is_mysqld_running())
{ instance->lock();
active_instance_found= instance->is_active();
instance->unlock();
if (active_instance_found)
return TRUE; return TRUE;
} }
}
return FALSE; return FALSE;
} }
/**
Add an instance into the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::add_instance(Instance *instance) int Instance_map::add_instance(Instance *instance)
{ {
return my_hash_insert(&hash, (byte *) instance); return my_hash_insert(&hash, (byte *) instance);
} }
/**
Remove instance from the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::remove_instance(Instance *instance) int Instance_map::remove_instance(Instance *instance)
{ {
return hash_delete(&hash, (byte *) instance); return hash_delete(&hash, (byte *) instance);
} }
/**
Create a new instance and register it in the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::create_instance(const LEX_STRING *instance_name, int Instance_map::create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options) const Named_value_arr *options)
{ {
...@@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name, ...@@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name,
} }
/**
Return a pointer to the instance or NULL, if there is no such instance.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
Instance * Instance_map::find(const LEX_STRING *name) Instance * Instance_map::find(const LEX_STRING *name)
{ {
return (Instance *) hash_search(&hash, (byte *) name->str, name->length); return (Instance *) hash_search(&hash, (byte *) name->str, name->length);
} }
/**
Init instances command line arguments after all options have been loaded.
*/
bool Instance_map::complete_initialization() bool Instance_map::complete_initialization()
{ {
bool mysqld_found; bool mysqld_found;
...@@ -455,7 +491,10 @@ bool Instance_map::complete_initialization() ...@@ -455,7 +491,10 @@ bool Instance_map::complete_initialization()
} }
/* load options from config files and create appropriate instance structures */ /**
Load options from config files and create appropriate instance
structures.
*/
int Instance_map::load() int Instance_map::load()
{ {
...@@ -505,8 +544,9 @@ int Instance_map::load() ...@@ -505,8 +544,9 @@ int Instance_map::load()
} }
/*--- Implementaton of the Instance map iterator class ---*/ /*************************************************************************
{{{ Instance_map::Iterator implementation.
*************************************************************************/
void Instance_map::Iterator::go_to_first() void Instance_map::Iterator::go_to_first()
{ {
...@@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next() ...@@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next()
return NULL; return NULL;
} }
/*************************************************************************
const char *Instance_map::get_instance_state_name(Instance *instance) }}}
{ *************************************************************************/
LIST *instance_node;
if (!instance->is_configured())
return "misconfigured";
if ((instance_node= guardian->find_instance_node(instance)) != NULL)
{
/* The instance is managed by Guardian: we can report precise state. */
return Guardian::get_instance_state_name(
guardian->get_instance_state(instance_node));
}
/* The instance is not managed by Guardian: we can report status only. */
return instance->is_mysqld_running() ? "online" : "offline";
}
/* /**
Create a new configuration section for mysqld-instance in the config file. Create a new configuration section for mysqld-instance in the config file.
SYNOPSIS SYNOPSIS
......
...@@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name, ...@@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name,
const Named_value_arr *options); const Named_value_arr *options);
/* /**
Instance_map - stores all existing instances Instance_map - stores all existing instances
*/ */
class Instance_map class Instance_map
{ {
public: public:
/* Instance_map iterator */ /**
Instance_map iterator
*/
class Iterator class Iterator
{ {
private: private:
...@@ -58,79 +61,43 @@ public: ...@@ -58,79 +61,43 @@ public:
void go_to_first(); void go_to_first();
Instance *next(); Instance *next();
}; };
friend class Iterator;
public: public:
/*
Return a pointer to the instance or NULL, if there is no such instance.
MT-NOTE: must be called under acquired lock.
*/
Instance *find(const LEX_STRING *name); Instance *find(const LEX_STRING *name);
/* Clear the configuration cache and reload the configuration file. */
int flush_instances();
/* The operation is used to check if there is an active instance or not. */
bool is_there_active_instance(); bool is_there_active_instance();
void lock(); void lock();
void unlock(); void unlock();
bool init(); bool init();
bool reset();
/* int load();
Process a given option and assign it to appropricate instance. This is
required for the option handler, passed to my_search_option_files().
*/
int process_one_option(const LEX_STRING *group, const char *option);
/* int process_one_option(const LEX_STRING *group, const char *option);
Add an instance into the internal hash.
MT-NOTE: the operation must be called under acquired lock.
*/
int add_instance(Instance *instance); int add_instance(Instance *instance);
/*
Remove instance from the internal hash.
MT-NOTE: the operation must be called under acquired lock.
*/
int remove_instance(Instance *instance); int remove_instance(Instance *instance);
/*
Create a new instance and register it in the internal hash.
MT-NOTE: the operation must be called under acquired lock.
*/
int create_instance(const LEX_STRING *instance_name, int create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options); const Named_value_arr *options);
public:
Instance_map(); Instance_map();
~Instance_map(); ~Instance_map();
/*
Retrieve client state name of the given instance.
MT-NOTE: the options must be called under acquired locks of the following
objects:
- Instance_map;
- Guardian;
*/
const char *get_instance_state_name(Instance *instance);
public:
const char *mysqld_path;
Guardian *guardian;
private: private:
/* loads options from config files */
int load();
/* inits instances argv's after all options have been loaded */
bool complete_initialization(); bool complete_initialization();
private: private:
enum { START_HASH_SIZE = 16 }; enum { START_HASH_SIZE = 16 };
pthread_mutex_t LOCK_instance_map; pthread_mutex_t LOCK_instance_map;
HASH hash; HASH hash;
private:
friend class Iterator;
}; };
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */ #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */
...@@ -46,7 +46,6 @@ public: ...@@ -46,7 +46,6 @@ public:
Instance_options(); Instance_options();
~Instance_options(); ~Instance_options();
/* fills in argv */
bool complete_initialization(); bool complete_initialization();
bool set_option(Named_value *option); bool set_option(Named_value *option);
......
...@@ -37,6 +37,9 @@ ...@@ -37,6 +37,9 @@
#include "user_map.h" #include "user_map.h"
/**********************************************************************
{{{ Platform-specific implementation.
**********************************************************************/
#ifndef __WIN__ #ifndef __WIN__
void set_signals(sigset_t *mask) void set_signals(sigset_t *mask)
...@@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig) ...@@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig)
#endif #endif
/**********************************************************************
}}}
**********************************************************************/
/********************************************************************** /**********************************************************************
Implementation of checking the actual thread model. {{{ Implementation of checking the actual thread model.
***********************************************************************/ ***********************************************************************/
namespace { /* no-indent */ namespace { /* no-indent */
...@@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads) ...@@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads)
} }
/**********************************************************************
}}}
***********************************************************************/
/********************************************************************** /**********************************************************************
Manager implementation Manager implementation
...@@ -152,23 +163,35 @@ bool Manager::linux_threads; ...@@ -152,23 +163,35 @@ bool Manager::linux_threads;
#endif // __WIN__ #endif // __WIN__
/**
Request shutdown of guardian and threads registered in Thread_registry.
SYNOPSIS
stop_all_threads()
*/
void Manager::stop_all_threads() void Manager::stop_all_threads()
{ {
/* /*
Let guardian thread know that it should break it's processing cycle, Let Guardian thread know that it should break it's processing cycle,
once it wakes up. once it wakes up.
*/ */
p_guardian->request_shutdown(); p_guardian->request_shutdown();
/* wake guardian */
pthread_cond_signal(&p_guardian->COND_guardian); /* Stop all threads. */
/* stop all threads */
p_thread_registry->deliver_shutdown(); p_thread_registry->deliver_shutdown();
} }
/* /**
manager - entry point to the main instance manager process: start Main manager function.
listener thread, write pid file and enter into signal handling.
SYNOPSIS
main()
DESCRIPTION
This is an entry point to the main instance manager process:
start listener thread, write pid file and enter into signal handling.
See also comments in mysqlmanager.cc to picture general Instance Manager See also comments in mysqlmanager.cc to picture general Instance Manager
architecture. architecture.
...@@ -194,22 +217,33 @@ int Manager::main() ...@@ -194,22 +217,33 @@ int Manager::main()
(const char *) (linux_threads ? "LINUX threads" : "POSIX threads")); (const char *) (linux_threads ? "LINUX threads" : "POSIX threads"));
#endif // __WIN__ #endif // __WIN__
Thread_registry thread_registry;
/* /*
All objects created in the manager() function live as long as All objects created in the Manager object live as long as thread_registry
thread_registry lives, and thread_registry is alive until there are lives, and thread_registry is alive until there are working threads.
working threads.
There are two main purposes of the Thread Registry:
1. Interrupt blocking I/O and signal condition variables in case of
shutdown;
2. Wait for detached threads before shutting down the main thread.
NOTE:
1. Handling shutdown can be done in more elegant manner by introducing
Event (or Condition) object with support of logical operations.
2. Using Thread Registry to wait for detached threads is definitely not
the best way, because when Thread Registry unregisters an thread, the
thread is still alive. Accurate way to wait for threads to stop is
not using detached threads and join all threads before shutdown.
*/ */
Thread_registry thread_registry;
User_map user_map; User_map user_map;
Instance_map instance_map; Instance_map instance_map;
Guardian guardian(&thread_registry, &instance_map, Guardian guardian(&thread_registry, &instance_map);
Options::Main::monitoring_interval);
Listener listener(&thread_registry, &user_map); Listener listener(&thread_registry, &user_map);
p_instance_map= &instance_map; p_instance_map= &instance_map;
p_guardian= instance_map.guardian= &guardian; p_guardian= &guardian;
p_thread_registry= &thread_registry; p_thread_registry= &thread_registry;
p_user_map= &user_map; p_user_map= &user_map;
...@@ -249,7 +283,7 @@ int Manager::main() ...@@ -249,7 +283,7 @@ int Manager::main()
} }
} }
/* write Instance Manager pid file */ /* Write Instance Manager pid file. */
log_info("IM pid file: '%s'; PID: %d.", log_info("IM pid file: '%s'; PID: %d.",
(const char *) Options::Main::pid_file_name, (const char *) Options::Main::pid_file_name,
...@@ -290,6 +324,7 @@ int Manager::main() ...@@ -290,6 +324,7 @@ int Manager::main()
permitted to process instances. And before flush_instances() has permitted to process instances. And before flush_instances() has
completed, there are no instances to guard. completed, there are no instances to guard.
*/ */
if (guardian.start(Thread::DETACHED)) if (guardian.start(Thread::DETACHED))
{ {
log_error("Can not start Guardian thread."); log_error("Can not start Guardian thread.");
...@@ -298,22 +333,12 @@ int Manager::main() ...@@ -298,22 +333,12 @@ int Manager::main()
/* Load instances. */ /* Load instances. */
{ if (Manager::flush_instances())
instance_map.guardian->lock();
instance_map.lock();
int flush_instances_status= instance_map.flush_instances();
instance_map.unlock();
instance_map.guardian->unlock();
if (flush_instances_status)
{ {
log_error("Can not init instances repository."); log_error("Can not init instances repository.");
stop_all_threads(); stop_all_threads();
goto err; goto err;
} }
}
/* Initialize the Listener. */ /* Initialize the Listener. */
...@@ -328,7 +353,8 @@ int Manager::main() ...@@ -328,7 +353,8 @@ int Manager::main()
After the list of guarded instances have been initialized, After the list of guarded instances have been initialized,
Guardian should start them. Guardian should start them.
*/ */
pthread_cond_signal(&guardian.COND_guardian);
guardian.ping();
/* Main loop. */ /* Main loop. */
...@@ -381,7 +407,6 @@ int Manager::main() ...@@ -381,7 +407,6 @@ int Manager::main()
if (!guardian.is_stopped()) if (!guardian.is_stopped())
{ {
guardian.request_shutdown(); guardian.request_shutdown();
pthread_cond_signal(&guardian.COND_guardian);
} }
else else
{ {
...@@ -406,3 +431,64 @@ err: ...@@ -406,3 +431,64 @@ err:
#endif #endif
return rc; return rc;
} }
/**
Re-read instance configuration file.
SYNOPSIS
flush_instances()
DESCRIPTION
This function will:
- clear the current list of instances. This removes both
running and stopped instances.
- load a new instance configuration from the file.
- pass on the new map to the guardian thread: it will start
all instances that are marked `guarded' and not yet started.
Note, as the check whether an instance is started is currently
very simple (returns TRUE if there is a MySQL server running
at the given port), this function has some peculiar
side-effects:
* if the port number of a running instance was changed, the
old instance is forgotten, even if it was running. The new
instance will be started at the new port.
* if the configuration was changed in a way that two
instances swapped their port numbers, the guardian thread
will not notice that and simply report that both instances
are configured successfully and running.
In order to avoid such side effects one should never call
FLUSH INSTANCES without prior stop of all running instances.
*/
bool Manager::flush_instances()
{
p_instance_map->lock();
if (p_instance_map->is_there_active_instance())
{
p_instance_map->unlock();
return TRUE;
}
if (p_instance_map->reset())
{
p_instance_map->unlock();
return TRUE;
}
if (p_instance_map->load())
{
p_instance_map->unlock();
return TRUE; /* Don't init guardian if we failed to load instances. */
}
get_guardian()->init(); /* TODO: check error status. */
get_guardian()->ping();
p_instance_map->unlock();
return FALSE;
}
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface #pragma interface
#endif #endif
#include <my_global.h> #include <my_global.h>
class Guardian; class Guardian;
...@@ -30,8 +31,12 @@ class Manager ...@@ -30,8 +31,12 @@ class Manager
{ {
public: public:
static int main(); static int main();
static bool flush_instances();
public:
/** /**
These methods return a non-zero value only for the duration These methods return a non-NULL value only for the duration
of main(). of main().
*/ */
static Instance_map *get_instance_map() { return p_instance_map; } static Instance_map *get_instance_map() { return p_instance_map; }
...@@ -39,6 +44,7 @@ public: ...@@ -39,6 +44,7 @@ public:
static Thread_registry *get_thread_registry() { return p_thread_registry; } static Thread_registry *get_thread_registry() { return p_thread_registry; }
static User_map *get_user_map() { return p_user_map; } static User_map *get_user_map() { return p_user_map; }
public:
#ifndef __WIN__ #ifndef __WIN__
static bool is_linux_threads() { return linux_threads; } static bool is_linux_threads() { return linux_threads; }
#endif // __WIN__ #endif // __WIN__
......
...@@ -78,7 +78,7 @@ int User::init(const char *line) ...@@ -78,7 +78,7 @@ int User::init(const char *line)
"User line: '%s'.", "User line: '%s'.",
(int) password_length, (int) password_length,
(int) SCRAMBLED_PASSWORD_CHAR_LENGTH, (int) SCRAMBLED_PASSWORD_CHAR_LENGTH,
line); (const char *) line);
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment