Commit b534ca4b authored by anozdrin/alik@booka's avatar anozdrin/alik@booka

Fix for the following bugs:

  - BUG#22306: STOP INSTANCE can not be applied for instances in Crashed,
    Failed and Abandoned;
  - BUG#23476: DROP INSTANCE does not work
  - BUG#23215: STOP INSTANCE takes too much time

BUG#22306:
The problem was that STOP INSTANCE checked that mysqld is up and running.
If it was not so, STOP INSTANCE reported an error. Now, STOP INSTANCE
reports an error if the instance has been started (mysqld can be down).

BUG#23476:
The problem was that DROP INSTANCE tried to stop inactive instance. The fix is
trivial.

BUG#23215:
The problem was that locks were not acquired properly, so the
instance-monitoring thread could not acquire the mutex, holded by the
query-processing thread.

The fix is to simplify locking scheme by moving instance-related information to
Instance-class out of Guardian-class. This allows to get rid of storing a
separate list of Instance-information in Guardian and keeping it synchronized
with the original list in Instance_map.
parent 931cf68b
......@@ -29,6 +29,7 @@
#include "guardian.h"
#include "instance_map.h"
#include "log.h"
#include "manager.h"
#include "messages.h"
#include "mysqld_error.h"
#include "mysql_manager_error.h"
......@@ -36,8 +37,11 @@
#include "priv.h"
#include "protocol.h"
/**************************************************************************
{{{ Static functions.
**************************************************************************/
/*
/**
modify_defaults_to_im_error -- a map of error codes of
mysys::modify_defaults_file() into Instance Manager error codes.
*/
......@@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES,
ER_ACCESS_OPTION_FILE };
/*
Add a string to a buffer.
/**
Parse version number from the version string.
SYNOPSIS
put_to_buff()
buff buffer to add the string
str string to add
position offset in the buff to add a string
parse_version_number()
version_str
version
version_size
DESCRIPTION
TODO
Function to add a string to the buffer. It is different from
store_to_protocol_packet, which is used in the protocol.cc.
The last one also stores the length of the string in a special way.
This is required for MySQL client/server protocol support only.
TODO: Move this function to Instance_options and parse version number
only once.
RETURN
0 - ok
1 - error occured
NOTE: This function is used only in SHOW INSTANCE STATUS statement at the
moment.
*/
static inline int put_to_buff(Buffer *buff, const char *str, uint *position)
{
uint len= strlen(str);
if (buff->append(*position, str, len))
return 1;
*position+= len;
return 0;
}
static int parse_version_number(const char *version_str, char *version,
uint version_size)
{
......@@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version,
return 0;
}
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
Implementation of Instance_name.
......@@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name)
Implementation of Show_instances.
**************************************************************************/
/*
/**
Implementation of SHOW INSTANCES statement.
Possible error codes:
......@@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net)
Instance *instance;
Instance_map::Iterator iterator(instance_map);
instance_map->guardian->lock();
instance_map->lock();
while ((instance= iterator.next()))
......@@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net)
Buffer send_buf; /* buffer for packets */
uint pos= 0;
instance->lock();
const char *instance_name= instance->options.instance_name.str;
const char *state_name= instance_map->get_instance_state_name(instance);
const char *state_name= instance->get_state_name();
if (store_to_protocol_packet(&send_buf, instance_name, &pos) ||
store_to_protocol_packet(&send_buf, state_name, &pos) ||
my_net_write(net, send_buf.buffer, pos))
{
err_status= TRUE;
break;
}
instance->unlock();
if (err_status)
break;
}
instance_map->unlock();
instance_map->guardian->unlock();
return err_status ? ER_OUT_OF_RESOURCES : 0;
}
......@@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net)
Implementation of Flush_instances.
**************************************************************************/
/*
/**
Implementation of FLUSH INSTANCES statement.
Possible error codes:
......@@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net)
int Flush_instances::execute(st_net *net, ulong connection_id)
{
instance_map->guardian->lock();
instance_map->lock();
if (instance_map->is_there_active_instance())
{
instance_map->unlock();
instance_map->guardian->unlock();
return ER_THERE_IS_ACTIVE_INSTACE;
}
if (instance_map->flush_instances())
{
instance_map->unlock();
instance_map->guardian->unlock();
if (Manager::flush_instances())
return ER_OUT_OF_RESOURCES;
}
instance_map->unlock();
instance_map->guardian->unlock();
return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0;
}
/**************************************************************************
Implementation of Abstract_instance_cmd.
Implementation of Instance_cmd.
**************************************************************************/
Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg)
:instance_name(instance_name_arg)
Instance_cmd::Instance_cmd(const LEX_STRING *instance_name_arg):
instance_name(instance_name_arg)
{
/*
MT-NOTE: we can not make a search for Instance object here,
......@@ -251,26 +232,39 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg
}
/**************************************************************************
Implementation of Abstract_instance_cmd.
**************************************************************************/
Abstract_instance_cmd::Abstract_instance_cmd(
const LEX_STRING *instance_name_arg)
:Instance_cmd(instance_name_arg)
{
}
int Abstract_instance_cmd::execute(st_net *net, ulong connection_id)
{
int err_code;
Instance *instance;
instance_map->lock();
{
Instance *instance= instance_map->find(get_instance_name());
if (!instance)
{
instance_map->unlock();
return ER_BAD_INSTANCE_NAME;
}
instance= instance_map->find(get_instance_name());
err_code= execute_impl(net, instance);
if (!instance)
{
instance_map->unlock();
return ER_BAD_INSTANCE_NAME;
}
instance->lock();
instance_map->unlock();
err_code= execute_impl(net, instance);
instance->unlock();
if (!err_code)
err_code= send_ok_response(net, connection_id);
......@@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg)
}
/*
/**
Implementation of SHOW INSTANCE STATUS statement.
Possible error codes:
......@@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance)
char version_num_buf[MAX_VERSION_LENGTH];
uint pos= 0;
const char *state_name;
const char *state_name= instance->get_state_name();
const char *version_tag= "unknown";
const char *version_num= "unknown";
const char *mysqld_compatible_status;
instance_map->guardian->lock();
state_name= instance_map->get_instance_state_name(instance);
mysqld_compatible_status= instance->is_mysqld_compatible() ? "yes" : "no";
instance_map->guardian->unlock();
const char *mysqld_compatible_status=
instance->is_mysqld_compatible() ? "yes" : "no";
if (instance->options.mysqld_version)
{
if (parse_version_number(instance->options.mysqld_version, version_num_buf,
sizeof(version_num_buf)))
return ER_OUT_OF_RESOURCES;
......@@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options(
}
/*
/**
Implementation of SHOW INSTANCE OPTIONS statement.
Possible error codes:
......@@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg)
}
/*
/**
Implementation of START INSTANCE statement.
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
ER_OUT_OF_RESOURCES Not enough resources to complete the operation
ER_INSTANCE_MISCONFIGURED The instance configuration is invalid
ER_INSTANCE_ALREADY_STARTED The instance is already started
ER_CANNOT_START_INSTANCE The instance could not have been started
TODO: as soon as this method operates only with Instance, we probably
should introduce a new method (execute_stop_instance()) in Instance and
just call it from here.
*/
int Start_instance::execute_impl(st_net * /* net */, Instance *instance)
{
int err_code;
if (!instance->is_configured())
return ER_INSTANCE_MISCONFIGURED;
if ((err_code= instance->start()))
return err_code;
if (instance->is_active())
return ER_INSTANCE_ALREADY_STARTED;
if (instance->start_mysqld())
return ER_CANNOT_START_INSTANCE;
if (!(instance->options.nonguarded))
instance_map->guardian->guard(instance);
instance->reset_stat();
instance->set_state(Instance::NOT_STARTED);
return 0;
}
......@@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg)
}
/*
/**
Implementation of STOP INSTANCE statement.
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
ER_OUT_OF_RESOURCES Not enough resources to complete the operation
TODO: as soon as this method operates only with Instance, we probably
should introduce a new method (execute_stop_instance()) in Instance and
just call it from here.
*/
int Stop_instance::execute_impl(st_net * /* net */, Instance *instance)
{
int err_code;
if (!instance->is_active())
return ER_INSTANCE_IS_NOT_STARTED;
if (!(instance->options.nonguarded))
instance_map->guardian->stop_guard(instance);
instance->set_state(Instance::STOPPED);
if ((err_code= instance->stop()))
return err_code;
return 0;
return instance->stop_mysqld() ? ER_STOP_INSTANCE : 0;
}
......@@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id)
**************************************************************************/
Create_instance::Create_instance(const LEX_STRING *instance_name_arg)
:instance_name(instance_name_arg)
:Instance_cmd(instance_name_arg)
{
}
/*
/**
This operation initializes Create_instance object.
SYNOPSIS
......@@ -604,7 +604,7 @@ bool Create_instance::init(const char **text)
}
/*
/**
This operation parses CREATE INSTANCE options.
SYNOPSIS
......@@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text)
}
/*
/**
Implementation of CREATE INSTANCE statement.
Possible error codes:
......@@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text)
int Create_instance::execute(st_net *net, ulong connection_id)
{
int err_code;
Instance *instance;
/* Check that the name is valid and there is no instance with such name. */
......@@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id)
return err_code;
}
instance= instance_map->find(get_instance_name());
DBUG_ASSERT(instance);
if ((err_code= create_instance_in_file(get_instance_name(), &options)))
{
Instance *instance= instance_map->find(get_instance_name());
if (instance)
instance_map->remove_instance(instance); /* instance is deleted here. */
instance_map->remove_instance(instance); /* instance is deleted here. */
instance_map->unlock();
return err_code;
}
/*
CREATE INSTANCE must not lead to start instance, even if it guarded.
TODO: The problem however is that if Instance Manager restarts after
creating instance, the instance will be restarted (see also BUG#19718).
*/
instance->set_state(Instance::STOPPED);
/* That's all. */
instance_map->unlock();
......@@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id)
**************************************************************************/
Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
:Abstract_instance_cmd(instance_name_arg)
:Instance_cmd(instance_name_arg)
{
}
/*
/**
Implementation of DROP INSTANCE statement.
Possible error codes:
......@@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
ER_OUT_OF_RESOURCES Not enough resources to complete the operation
*/
int Drop_instance::execute_impl(st_net * /* net */, Instance *instance)
int Drop_instance::execute(st_net *net, ulong connection_id)
{
int err_code;
Instance *instance;
/* Lock Guardian, then Instance_map. */
instance_map->lock();
/* Find an instance. */
instance= instance_map->find(get_instance_name());
if (!instance)
{
instance_map->unlock();
return ER_BAD_INSTANCE_NAME;
}
instance->lock();
/* Check that the instance is offline. */
if (instance_map->guardian->is_active(instance))
if (instance->is_active())
{
instance->unlock();
instance_map->unlock();
return ER_DROP_ACTIVE_INSTANCE;
}
/* Try to remove instance from the file. */
err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL,
get_instance_name()->str, MY_REMOVE_SECTION);
......@@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance)
(const char *) get_instance_name()->str,
(const char *) Options::Main::config_file,
(int) err_code);
}
if (err_code)
instance->unlock();
instance_map->unlock();
return modify_defaults_to_im_error[err_code];
}
/* Remove instance from the instance map hash and Guardian's list. */
/* Unlock the instance before destroy. */
if (!instance->options.nonguarded)
instance_map->guardian->stop_guard(instance);
instance->unlock();
if ((err_code= instance->stop()))
return err_code;
/*
Remove instance from the instance map
(the instance will be also destroyed here).
*/
instance_map->remove_instance(instance);
return 0;
}
/* Unlock the instance map. */
instance_map->unlock();
/* That's all: send ok. */
int Drop_instance::send_ok_response(st_net *net, ulong connection_id)
{
if (net_send_ok(net, connection_id, "Instance dropped"))
return ER_OUT_OF_RESOURCES;
......@@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg,
}
/*
/**
Implementation of SHOW INSTANCE LOG statement.
Possible error codes:
......@@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files
}
/*
/**
Implementation of SHOW INSTANCE LOG FILES statement.
Possible error codes:
......@@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance)
Implementation of Abstract_option_cmd.
**************************************************************************/
/*
/**
Instance_options_list -- a data class representing a list of options for
some instance.
*/
......@@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text)
}
/*
/**
Correct the option file. The "skip" option is used to remove the found
option.
......@@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option,
}
/*
Implementation of SET statement.
/**
Lock Instance Map and call execute_impl().
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
......@@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name)
}
/**
Skeleton implementation of option-management command.
MT-NOTE: Instance Map is locked before calling this operation.
*/
int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
{
int err_code= 0;
......@@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i);
bool instance_is_active;
lst->instance= instance_map->find(lst->get_instance_name());
if (!lst->instance)
return ER_BAD_INSTANCE_NAME;
if (instance_map->guardian->is_active(lst->instance))
lst->instance->lock();
instance_is_active= lst->instance->is_active();
lst->instance->unlock();
if (instance_is_active)
return ER_INSTANCE_IS_ACTIVE;
}
......@@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i);
lst->instance->lock();
for (int j= 0; j < lst->options.get_size(); ++j)
{
Named_value option= lst->options.get_element(j);
......@@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
break;
}
lst->instance->unlock();
if (err_code)
break;
}
......@@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Implementation of Set_option.
**************************************************************************/
/*
/**
This operation parses SET options.
SYNOPSIS
......@@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option)
Implementation of Unset_option.
**************************************************************************/
/*
/**
This operation parses UNSET options.
SYNOPSIS
......@@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text)
}
/*
/**
Implementation of UNSET statement.
Possible error codes:
......
......@@ -30,7 +30,7 @@
#endif
/*
/**
Print all instances of this instance manager.
Grammar: SHOW INSTANCES
*/
......@@ -50,7 +50,7 @@ private:
};
/*
/**
Reread configuration file and refresh internal cache.
Grammar: FLUSH INSTANCES
*/
......@@ -66,11 +66,50 @@ public:
};
/*
/**
Base class for Instance-specific commands
(commands that operate on one instance).
Instance_cmd extends Command class by:
- an attribute for storing instance name;
- code to initialize instance name in constructor;
- an accessor to get instance name.
*/
class Instance_cmd : public Command
{
public:
Instance_cmd(const LEX_STRING *instance_name_arg);
protected:
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private:
Instance_name instance_name;
};
/**
Abstract class for Instance-specific commands.
Abstract_instance_cmd extends Instance_cmd by providing a common
framework for writing command-implementations. Basically, the class
implements Command::execute() pure virtual function in the following
way:
- Lock Instance_map;
- Get an instance by name. Return an error, if there is no such
instance;
- Lock the instance;
- Unlock Instance_map;
- Call execute_impl(), which should be implemented in derived class;
- Unlock the instance;
- Send response to the client and return error status.
*/
class Abstract_instance_cmd: public Command
class Abstract_instance_cmd: public Instance_cmd
{
public:
Abstract_instance_cmd(const LEX_STRING *instance_name_arg);
......@@ -79,29 +118,24 @@ public:
virtual int execute(st_net *net, ulong connection_id);
protected:
/* MT-NOTE: this operation is called under acquired Instance_map's lock. */
/**
This operation is intended to contain command-specific implementation.
MT-NOTE: this operation is called under acquired Instance's lock.
*/
virtual int execute_impl(st_net *net, Instance *instance) = 0;
/*
/**
This operation is invoked on successful return of execute_impl() and is
intended to send closing data.
MT-NOTE: this operation is called under released Instance_map's lock.
MT-NOTE: this operation is called under released Instance's lock.
*/
virtual int send_ok_response(st_net *net, ulong connection_id) = 0;
protected:
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private:
Instance_name instance_name;
};
/*
/**
Print status of an instance.
Grammar: SHOW INSTANCE STATUS <instance_name>
*/
......@@ -121,7 +155,7 @@ private:
};
/*
/**
Print options of chosen instance.
Grammar: SHOW INSTANCE OPTIONS <instance_name>
*/
......@@ -141,7 +175,7 @@ private:
};
/*
/**
Start an instance.
Grammar: START INSTANCE <instance_name>
*/
......@@ -157,7 +191,7 @@ protected:
};
/*
/**
Stop an instance.
Grammar: STOP INSTANCE <instance_name>
*/
......@@ -173,12 +207,12 @@ protected:
};
/*
/**
Create an instance.
Grammar: CREATE INSTANCE <instance_name> [<options>]
*/
class Create_instance: public Command
class Create_instance: public Instance_cmd
{
public:
Create_instance(const LEX_STRING *instance_name_arg);
......@@ -189,22 +223,15 @@ public:
protected:
virtual int execute(st_net *net, ulong connection_id);
inline const LEX_STRING *get_instance_name() const
{
return instance_name.get_str();
}
private:
bool parse_args(const char **text);
private:
Instance_name instance_name;
Named_value_arr options;
};
/*
/**
Drop an instance.
Grammar: DROP INSTANCE <instance_name>
......@@ -213,18 +240,17 @@ private:
is removed from the instance map.
*/
class Drop_instance: public Abstract_instance_cmd
class Drop_instance: public Instance_cmd
{
public:
Drop_instance(const LEX_STRING *instance_name_arg);
protected:
virtual int execute_impl(st_net *net, Instance *instance);
virtual int send_ok_response(st_net *net, ulong connection_id);
virtual int execute(st_net *net, ulong connection_id);
};
/*
/**
Print requested part of the log.
Grammar:
SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end]
......@@ -252,7 +278,7 @@ private:
};
/*
/**
Shows the list of the log files, used by an instance.
Grammar: SHOW <instance_name> LOG FILES
*/
......@@ -272,7 +298,7 @@ private:
};
/*
/**
Abstract class for option-management commands.
*/
......@@ -312,7 +338,7 @@ private:
};
/*
/**
Set an option for the instance.
Grammar: SET instance_name.option[=option_value][, ...]
*/
......@@ -329,7 +355,7 @@ protected:
};
/*
/**
Remove option of the instance.
Grammar: UNSET instance_name.option[, ...]
*/
......@@ -346,7 +372,7 @@ protected:
};
/*
/**
Syntax error command.
This command is issued if parser reported a syntax error. We need it to
......
......@@ -28,101 +28,126 @@
#include "instance_map.h"
#include "log.h"
#include "mysql_manager_error.h"
#include "options.h"
const char *
Guardian::get_instance_state_name(enum_instance_state state)
{
switch (state) {
case NOT_STARTED:
return "offline";
case STARTING:
return "starting";
case STARTED:
return "online";
case JUST_CRASHED:
return "failed";
/*************************************************************************
{{{ Constructor & destructor.
*************************************************************************/
case CRASHED:
return "crashed";
case CRASHED_AND_ABANDONED:
return "abandoned";
case STOPPING:
return "stopping";
}
/**
Guardian constructor.
return NULL; /* just to ignore compiler warning. */
}
SYNOPSIS
Guardian()
thread_registry_arg
instance_map_arg
/* {{{ Constructor & destructor. */
DESCRIPTION
Nominal contructor intended for assigning references and initialize
trivial objects. Real initialization is made by init() method.
*/
Guardian::Guardian(Thread_registry *thread_registry_arg,
Instance_map *instance_map_arg,
uint monitoring_interval_arg)
:stopped(FALSE),
monitoring_interval(monitoring_interval_arg),
Instance_map *instance_map_arg)
:shutdown_requested(FALSE),
stopped(FALSE),
thread_registry(thread_registry_arg),
instance_map(instance_map_arg),
shutdown_requested(FALSE)
instance_map(instance_map_arg)
{
pthread_mutex_init(&LOCK_guardian, 0);
pthread_cond_init(&COND_guardian, 0);
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
}
Guardian::~Guardian()
{
/* delay guardian destruction to the moment when no one needs it */
pthread_mutex_lock(&LOCK_guardian);
free_root(&alloc, MYF(0));
pthread_mutex_unlock(&LOCK_guardian);
/*
NOTE: it's necessary to synchronize here, because Guiardian thread can be
still alive an hold the mutex (because it is detached and we have no
control over it).
*/
lock();
unlock();
pthread_mutex_destroy(&LOCK_guardian);
pthread_cond_destroy(&COND_guardian);
}
/* }}} */
/*************************************************************************
}}}
*************************************************************************/
/**
Send request to stop Guardian.
SYNOPSIS
request_shutdown()
*/
void Guardian::request_shutdown()
{
pthread_mutex_lock(&LOCK_guardian);
/* STOP Instances or just clean up Guardian repository */
stop_instances();
lock();
shutdown_requested= TRUE;
pthread_mutex_unlock(&LOCK_guardian);
unlock();
ping();
}
void Guardian::process_instance(Instance *instance,
GUARD_NODE *current_node,
LIST **guarded_instances,
LIST *node)
/**
Process an instance.
SYNOPSIS
process_instance()
instance a pointer to the instance for processing
MT-NOTE:
- the given instance must be locked before calling this operation;
- Guardian must be locked before calling this operation.
*/
void Guardian::process_instance(Instance *instance)
{
uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY;
/* The amount of times, Guardian attempts to restart an instance */
int restart_retry= 100;
time_t current_time= time(NULL);
if (current_node->state == STOPPING)
if (instance->get_state() == Instance::STOPPING)
{
waitchild= instance->options.get_shutdown_delay();
/* This brach is executed during shutdown. */
/* this returns TRUE if and only if an instance was stopped for sure */
/* This returns TRUE if and only if an instance was stopped for sure. */
if (instance->is_crashed())
*guarded_instances= list_delete(*guarded_instances, node);
else if ( (uint) (current_time - current_node->last_checked) > waitchild)
{
log_info("Guardian: '%s' stopped.",
(const char *) instance->get_name()->str);
instance->set_state(Instance::STOPPED);
}
else if ((uint) (current_time - instance->last_checked) >=
instance->options.get_shutdown_delay())
{
log_info("Guardian: '%s' hasn't stopped within %d secs.",
(const char *) instance->get_name()->str,
(int) instance->options.get_shutdown_delay());
instance->kill_mysqld(SIGKILL);
/*
Later we do node= node->next. This is ok, as we are only removing
the node from the list. The pointer to the next one is still valid.
*/
*guarded_instances= list_delete(*guarded_instances, node);
log_info("Guardian: pretend that '%s' is killed.",
(const char *) instance->get_name()->str);
instance->set_state(Instance::STOPPED);
}
else
{
log_info("Guardian: waiting for '%s' to stop (%d secs left).",
(const char *) instance->get_name()->str,
(int) (instance->options.get_shutdown_delay() -
current_time + instance->last_checked));
}
return;
......@@ -133,83 +158,90 @@ void Guardian::process_instance(Instance *instance,
/* The instance can be contacted on it's port */
/* If STARTING also check that pidfile has been created */
if (current_node->state == STARTING &&
current_node->instance->options.load_pid() == 0)
if (instance->get_state() == Instance::STARTING &&
instance->options.load_pid() == 0)
{
/* Pid file not created yet, don't go to STARTED state yet */
}
else if (current_node->state != STARTED)
else if (instance->get_state() != Instance::STARTED)
{
/* clear status fields */
log_info("Guardian: '%s' is running, set state to STARTED.",
(const char *) instance->options.instance_name.str);
current_node->restart_counter= 0;
current_node->crash_moment= 0;
current_node->state= STARTED;
instance->reset_stat();
instance->set_state(Instance::STARTED);
}
}
else
{
switch (current_node->state) {
case NOT_STARTED:
switch (instance->get_state()) {
case Instance::NOT_STARTED:
log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str);
/* NOTE, set state to STARTING _before_ start() is called */
current_node->state= STARTING;
instance->start();
current_node->last_checked= current_time;
break;
case STARTED: /* fallthrough */
case STARTING: /* let the instance start or crash */
if (instance->is_crashed())
{
current_node->crash_moment= current_time;
current_node->last_checked= current_time;
current_node->state= JUST_CRASHED;
/* fallthrough -- restart an instance immediately */
}
else
break;
case JUST_CRASHED:
if (current_time - current_node->crash_moment <= 2)
/* NOTE: set state to STARTING _before_ start() is called. */
instance->set_state(Instance::STARTING);
instance->last_checked= current_time;
instance->start_mysqld();
return;
case Instance::STARTED: /* fallthrough */
case Instance::STARTING: /* let the instance start or crash */
if (!instance->is_crashed())
return;
instance->crash_moment= current_time;
instance->last_checked= current_time;
instance->set_state(Instance::JUST_CRASHED);
/* fallthrough -- restart an instance immediately */
case Instance::JUST_CRASHED:
if (current_time - instance->crash_moment <= 2)
{
if (instance->is_crashed())
{
instance->start();
instance->start_mysqld();
log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str);
}
}
else
current_node->state= CRASHED;
break;
case CRASHED: /* just regular restarts */
if (current_time - current_node->last_checked >
monitoring_interval)
instance->set_state(Instance::CRASHED);
return;
case Instance::CRASHED: /* just regular restarts */
if (current_time - instance->last_checked <=
Options::Main::monitoring_interval)
return;
if (instance->restart_counter < restart_retry)
{
if ((current_node->restart_counter < restart_retry))
{
if (instance->is_crashed())
{
instance->start();
current_node->last_checked= current_time;
current_node->restart_counter++;
log_info("Guardian: restarting '%s'...",
(const char *) instance->options.instance_name.str);
}
}
else
if (instance->is_crashed())
{
log_info("Guardian: can not start '%s'. "
"Abandoning attempts to (re)start it",
instance->start_mysqld();
instance->last_checked= current_time;
log_info("Guardian: restarting '%s'...",
(const char *) instance->options.instance_name.str);
current_node->state= CRASHED_AND_ABANDONED;
}
}
break;
case CRASHED_AND_ABANDONED:
break; /* do nothing */
else
{
log_info("Guardian: can not start '%s'. "
"Abandoning attempts to (re)start it",
(const char *) instance->options.instance_name.str);
instance->set_state(Instance::CRASHED_AND_ABANDONED);
}
return;
case Instance::CRASHED_AND_ABANDONED:
return; /* do nothing */
default:
DBUG_ASSERT(0);
}
......@@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance,
}
/*
/**
Main function of Guardian thread.
SYNOPSIS
run()
DESCRIPTION
Check for all guarded instances and restart them if needed. If everything
is fine go and sleep for some time.
Check for all guarded instances and restart them if needed.
*/
void Guardian::run()
{
Instance *instance;
LIST *node;
struct timespec timeout;
log_info("Guardian: started.");
thread_registry->register_thread(&thread_info);
pthread_mutex_lock(&LOCK_guardian);
/* Loop, until all instances were shut down at the end. */
/* loop, until all instances were shut down at the end */
while (!(shutdown_requested && (guarded_instances == NULL)))
while (true)
{
node= guarded_instances;
Instance_map::Iterator instances_it(instance_map);
Instance *instance;
bool all_instances_stopped= TRUE;
instance_map->lock();
while (node != NULL)
while ((instance= instances_it.next()))
{
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
instance= ((GUARD_NODE *) node->data)->instance;
process_instance(instance, current_node, &guarded_instances, node);
instance->lock();
node= node->next;
if (!instance->is_guarded() ||
instance->get_state() == Instance::STOPPED)
{
instance->unlock();
continue;
}
process_instance(instance);
if (instance->get_state() != Instance::STOPPED)
all_instances_stopped= FALSE;
instance->unlock();
}
timeout.tv_sec= time(NULL) + monitoring_interval;
instance_map->unlock();
lock();
if (shutdown_requested && all_instances_stopped)
{
log_info("Guardian: all guarded mysqlds stopped.");
stopped= TRUE;
unlock();
break;
}
timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval;
timeout.tv_nsec= 0;
/* check the loop predicate before sleeping */
if (!(shutdown_requested && (!(guarded_instances))))
thread_registry->cond_timedwait(&thread_info, &COND_guardian,
&LOCK_guardian, &timeout);
thread_registry->cond_timedwait(&thread_info, &COND_guardian,
&LOCK_guardian, &timeout);
unlock();
}
log_info("Guardian: stopped.");
stopped= TRUE;
pthread_mutex_unlock(&LOCK_guardian);
/* now, when the Guardian is stopped we can stop the IM */
/* Now, when the Guardian is stopped we can stop the IM. */
thread_registry->unregister_thread(&thread_info);
thread_registry->request_shutdown();
......@@ -274,129 +328,65 @@ void Guardian::run()
}
int Guardian::is_stopped()
/**
Return the value of stopped flag.
*/
bool Guardian::is_stopped()
{
int var;
pthread_mutex_lock(&LOCK_guardian);
lock();
var= stopped;
pthread_mutex_unlock(&LOCK_guardian);
unlock();
return var;
}
/*
Initialize the list of guarded instances: loop through the Instance_map and
add all of the instances, which don't have 'nonguarded' option specified.
SYNOPSIS
Guardian::init()
/**
Wake up Guardian thread.
NOTE: The operation should be invoked with the following locks acquired:
- Guardian;
- Instance_map;
RETURN
0 - ok
1 - error occurred
MT-NOTE: though usually the mutex associated with condition variable should
be acquired before signalling the variable, here this is not needed.
Signalling under locked mutex is used to avoid lost signals. In the current
logic however locking mutex does not guarantee that the signal will not be
lost.
*/
int Guardian::init()
void Guardian::ping()
{
Instance *instance;
Instance_map::Iterator iterator(instance_map);
/* clear the list of guarded instances */
free_root(&alloc, MYF(0));
init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
guarded_instances= NULL;
while ((instance= iterator.next()))
{
if (instance->options.nonguarded)
continue;
if (guard(instance, TRUE)) /* do not lock guardian */
return 1;
}
return 0;
pthread_cond_signal(&COND_guardian);
}
/*
Add instance to the Guardian list
/**
Prepare list of instances.
SYNOPSIS
guard()
instance the instance to be guarded
nolock whether we prefer do not lock Guardian here,
but use external locking instead
DESCRIPTION
The instance is added to the guarded instances list. Usually guard() is
called after we start an instance.
init()
RETURN
0 - ok
1 - error occurred
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Guardian::guard(Instance *instance, bool nolock)
void Guardian::init()
{
LIST *node;
GUARD_NODE *content;
node= (LIST *) alloc_root(&alloc, sizeof(LIST));
content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
if ((!(node)) || (!(content)))
return 1;
/* we store the pointers to instances from the instance_map's MEM_ROOT */
content->instance= instance;
content->restart_counter= 0;
content->crash_moment= 0;
content->state= NOT_STARTED;
node->data= (void*) content;
if (nolock)
guarded_instances= list_add(guarded_instances, node);
else
{
pthread_mutex_lock(&LOCK_guardian);
guarded_instances= list_add(guarded_instances, node);
pthread_mutex_unlock(&LOCK_guardian);
}
return 0;
}
/*
TODO: perhaps it would make sense to create a pool of the LIST nodeents
and give them upon request. Now we are loosing a bit of memory when
guarded instance was stopped and then restarted (since we cannot free just
a piece of the MEM_ROOT).
*/
int Guardian::stop_guard(Instance *instance)
{
LIST *node;
pthread_mutex_lock(&LOCK_guardian);
node= find_instance_node(instance);
Instance *instance;
Instance_map::Iterator iterator(instance_map);
if (node != NULL)
guarded_instances= list_delete(guarded_instances, node);
while ((instance= iterator.next()))
{
instance->lock();
pthread_mutex_unlock(&LOCK_guardian);
instance->reset_stat();
instance->set_state(Instance::NOT_STARTED);
/* if there is nothing to delete it is also fine */
return 0;
instance->unlock();
}
}
/*
/**
An internal method which is called at shutdown to unregister instances and
attempt to stop them if requested.
......@@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance)
accordingly.
NOTE
Guardian object should be locked by the calling function.
Guardian object should be locked by the caller.
RETURN
0 - ok
1 - error occurred
*/
int Guardian::stop_instances()
void Guardian::stop_instances()
{
LIST *node;
node= guarded_instances;
while (node != NULL)
Instance_map::Iterator instances_it(instance_map);
Instance *instance;
instance_map->lock();
while ((instance= instances_it.next()))
{
GUARD_NODE *current_node= (GUARD_NODE *) node->data;
instance->lock();
if (!instance->is_guarded() ||
instance->get_state() == Instance::STOPPED)
{
instance->unlock();
continue;
}
/*
If instance is running or was running (and now probably hanging),
request stop.
*/
if (current_node->instance->is_mysqld_running() ||
(current_node->state == STARTED))
if (instance->is_mysqld_running() ||
instance->get_state() == Instance::STARTED)
{
current_node->state= STOPPING;
current_node->last_checked= time(NULL);
instance->set_state(Instance::STOPPING);
instance->last_checked= time(NULL);
}
else
/* otherwise remove it from the list */
guarded_instances= list_delete(guarded_instances, node);
/* But try to kill it anyway. Just in case */
current_node->instance->kill_mysqld(SIGTERM);
node= node->next;
{
/* Otherwise mark it as STOPPED. */
instance->set_state(Instance::STOPPED);
}
/* Request mysqld to stop. */
instance->kill_mysqld(SIGTERM);
instance->unlock();
}
return 0;
instance_map->unlock();
}
/**
Lock Guardian.
*/
void Guardian::lock()
{
pthread_mutex_lock(&LOCK_guardian);
}
/**
Unlock Guardian.
*/
void Guardian::unlock()
{
pthread_mutex_unlock(&LOCK_guardian);
}
LIST *Guardian::find_instance_node(Instance *instance)
{
LIST *node= guarded_instances;
while (node != NULL)
{
/*
We compare only pointers, as we always use pointers from the
instance_map's MEM_ROOT.
*/
if (((GUARD_NODE *) node->data)->instance == instance)
return node;
node= node->next;
}
return NULL;
}
bool Guardian::is_active(Instance *instance)
{
bool guarded;
lock();
guarded= find_instance_node(instance) != NULL;
/* is_running() can take a long time, so let's unlock mutex first. */
unlock();
if (guarded)
return true;
return instance->is_mysqld_running();
}
......@@ -17,10 +17,12 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "thread_registry.h"
#include <my_global.h>
#include <my_sys.h>
#include <my_list.h>
#include "thread_registry.h"
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface
#endif
......@@ -28,7 +30,6 @@
class Instance;
class Instance_map;
class Thread_registry;
struct GUARD_NODE;
/**
The guardian thread is responsible for monitoring and restarting of guarded
......@@ -38,97 +39,73 @@ struct GUARD_NODE;
class Guardian: public Thread
{
public:
/* states of an instance */
enum enum_instance_state { NOT_STARTED= 1, STARTING, STARTED, JUST_CRASHED,
CRASHED, CRASHED_AND_ABANDONED, STOPPING };
/*
The Guardian list node structure. Guardian utilizes it to store
guarded instances plus some additional info.
*/
Guardian(Thread_registry *thread_registry_arg,
Instance_map *instance_map_arg);
~Guardian();
struct GUARD_NODE
{
Instance *instance;
/* state of an instance (i.e. STARTED, CRASHED, etc.) */
enum_instance_state state;
/* the amount of attemts to restart instance (cleaned up at success) */
int restart_counter;
/* triggered at a crash */
time_t crash_moment;
/* General time field. Used to provide timeouts (at shutdown and restart) */
time_t last_checked;
};
/* Return client state name. */
static const char *get_instance_state_name(enum_instance_state state);
void init();
Guardian(Thread_registry *thread_registry_arg,
Instance_map *instance_map_arg,
uint monitoring_interval_arg);
virtual ~Guardian();
/* Initialize or refresh the list of guarded instances */
int init();
/* Request guardian shutdown. Stop instances if needed */
public:
void request_shutdown();
/* Start instance protection */
int guard(Instance *instance, bool nolock= FALSE);
/* Stop instance protection */
int stop_guard(Instance *instance);
/* Returns TRUE if guardian thread is stopped */
int is_stopped();
bool is_stopped();
void lock();
void unlock();
/*
Return an internal list node for the given instance if the instance is
managed by Guardian. Otherwise, return NULL.
void ping();
MT-NOTE: must be called under acquired lock.
*/
LIST *find_instance_node(Instance *instance);
protected:
virtual void run();
private:
void stop_instances();
/* The operation is used to check if the instance is active or not. */
bool is_active(Instance *instance);
void process_instance(Instance *instance);
private:
/*
Return state of the given instance list node. The pointer must specify
a valid list node.
LOCK_guardian protectes the members in this section:
- shutdown_requested;
- stopped;
Also, it is used for COND_guardian.
*/
inline enum_instance_state get_instance_state(LIST *instance_node);
protected:
/* Main funtion of the thread */
virtual void run();
pthread_mutex_t LOCK_guardian;
public:
/*
Guardian's main loop waits on this condition. So, it should be signalled
each time, when instance state has been changed and we want Guardian to
wake up.
TODO: Change this to having data-scoped conditions, i.e. conditions,
which indicate that some data has been changed.
*/
pthread_cond_t COND_guardian;
private:
/* Prepares Guardian shutdown. Stops instances is needed */
int stop_instances();
/* check instance state and act accordingly */
void process_instance(Instance *instance, GUARD_NODE *current_node,
LIST **guarded_instances, LIST *elem);
/*
This variable is set to TRUE, when Manager thread is shutting down.
The flag is used by Guardian thread to understand that it's time to
finish.
*/
bool shutdown_requested;
/*
This flag is set to TRUE on shutdown by Guardian thread, when all guarded
mysqlds are stopped.
int stopped;
The flag is used in the Manager thread to wait for Guardian to stop all
mysqlds.
*/
bool stopped;
private:
pthread_mutex_t LOCK_guardian;
Thread_info thread_info;
int monitoring_interval;
Thread_registry *thread_registry;
Instance_map *instance_map;
LIST *guarded_instances;
MEM_ROOT alloc;
/* this variable is set to TRUE when we want to stop Guardian thread */
bool shutdown_requested;
};
inline Guardian::enum_instance_state
Guardian::get_instance_state(LIST *instance_node)
{
return ((GUARD_NODE *) instance_node->data)->state;
}
private:
Guardian(const Guardian &);
Guardian&operator =(const Guardian &);
};
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */
......@@ -36,7 +36,9 @@
#include "thread_registry.h"
#include "instance_map.h"
/* {{{ Platform-specific functions. */
/*************************************************************************
{{{ Platform-specific functions.
*************************************************************************/
#ifndef __WIN__
typedef pid_t My_process_info;
......@@ -44,34 +46,6 @@ typedef pid_t My_process_info;
typedef PROCESS_INFORMATION My_process_info;
#endif
/*
Proxy thread is a simple way to avoid all pitfalls of the threads
implementation in the OS (e.g. LinuxThreads). With such a thread we
don't have to process SIGCHLD, which is a tricky business if we want
to do it in a portable way.
*/
class Instance_monitor: public Thread
{
public:
Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
protected:
virtual void run();
void start_and_monitor_instance(Instance_options *old_instance_options,
Instance_map *instance_map,
Thread_registry *thread_registry);
private:
Instance *instance;
};
void Instance_monitor::run()
{
start_and_monitor_instance(&instance->options,
Manager::get_instance_map(),
Manager::get_thread_registry());
delete this;
}
/*
Wait for an instance
......@@ -285,113 +259,149 @@ int kill(pid_t pid, int signum)
}
#endif
/* }}} */
/*************************************************************************
}}}
*************************************************************************/
/* {{{ Static constants. */
/*************************************************************************
{{{ Static constants.
*************************************************************************/
const LEX_STRING
Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") };
/* }}} */
/*************************************************************************
}}}
*************************************************************************/
/*
Fork child, exec an instance and monitor it.
/*************************************************************************
{{{ Instance Monitor thread.
*************************************************************************/
SYNOPSIS
start_and_monitor_instance()
old_instance_options Pointer to the options of the instance to be
launched. This info is likely to become obsolete
when function returns from wait_process()
instance_map Pointer to the instance_map. We use it to protect
the instance from deletion, while we are working
with it.
/**
Proxy thread is a simple way to avoid all pitfalls of the threads
implementation in the OS (e.g. LinuxThreads). With such a thread we
don't have to process SIGCHLD, which is a tricky business if we want
to do it in a portable way.
DESCRIPTION
Fork a child, then exec and monitor it. When the child is dead,
find appropriate instance (for this purpose we save its name),
set appropriate flags and wake all threads waiting for instance
to stop.
NOTE
A separate thread for starting/monitoring instance is a simple way
to avoid all pitfalls of the threads implementation in the OS (e.g.
LinuxThreads). For one, with such a thread we don't have to process
SIGCHLD, which is a tricky business if we want to do it in a
portable way.
Instance Monitor Thread forks a child process, execs mysqld and waits for
the child to die.
RETURN
Function returns no value
Instance Monitor assumes that the monitoring instance will not be dropped.
This is guaranteed by having flag monitoring_thread_active and
Instance::is_active() operation.
*/
void
Instance_monitor::
start_and_monitor_instance(Instance_options *old_instance_options,
Instance_map *instance_map,
Thread_registry *thread_registry)
class Instance_monitor: public Thread
{
Instance_name instance_name(&old_instance_options->instance_name);
Instance *current_instance;
My_process_info process_info;
Thread_info thread_info;
public:
Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
protected:
virtual void run();
void start_and_monitor_instance();
private:
Instance *instance;
};
void Instance_monitor::run()
{
start_and_monitor_instance();
delete this;
}
void Instance_monitor::start_and_monitor_instance()
{
Thread_registry *thread_registry= Manager::get_thread_registry();
Guardian *guardian= Manager::get_guardian();
My_process_info mysqld_process_info;
Thread_info monitor_thread_info;
log_info("Instance '%s': Monitor: started.",
(const char *) instance->get_name()->str);
if (!old_instance_options->nonguarded)
{
/*
Register thread in Thread_registry to wait for it to stop on shutdown
only if instance is guarded. If instance is guarded, the thread will not
finish, because nonguarded instances are not stopped on shutdown.
*/
thread_registry->register_thread(&thread_info, FALSE);
}
/*
Lock instance map to guarantee that no instances are deleted during
strmake() and execv() calls.
For guarded instance register the thread in Thread_registry to wait for
the thread to stop on shutdown (nonguarded instances are not stopped on
shutdown, so the thread will no finish).
*/
instance_map->lock();
/*
Save the instance name in the case if Instance object we
are using is destroyed. (E.g. by "FLUSH INSTANCES")
*/
if (instance->is_guarded())
{
thread_registry->register_thread(&monitor_thread_info, FALSE);
}
/* Starting mysqld. */
log_info("Instance '%s': Monitor: starting mysqld...",
(const char *) instance->get_name()->str);
if (start_process(old_instance_options, &process_info))
if (start_process(&instance->options, &mysqld_process_info))
{
instance_map->unlock();
return; /* error is logged */
instance->lock();
instance->monitoring_thread_active= FALSE;
instance->unlock();
return;
}
/* allow users to delete instances */
instance_map->unlock();
/* Waiting for mysqld to die. */
log_info("Instance '%s': Monitor: waiting for mysqld to stop...",
(const char *) instance->get_name()->str);
wait_process(&process_info); /* Don't check for return value. */
wait_process(&mysqld_process_info); /* Don't check for return value. */
instance_map->lock();
log_info("Instance '%s': Monitor: mysqld stopped.",
(const char *) instance->get_name()->str);
current_instance= instance_map->find(instance_name.get_str());
/* Update instance status. */
if (current_instance)
current_instance->set_crash_flag_n_wake_all();
instance->lock();
instance_map->unlock();
if (instance->is_guarded())
thread_registry->unregister_thread(&monitor_thread_info);
if (!old_instance_options->nonguarded)
thread_registry->unregister_thread(&thread_info);
instance->crashed= TRUE;
instance->monitoring_thread_active= FALSE;
log_info("Instance '%s': Monitor: finished.",
(const char *) instance->get_name()->str);
instance->unlock();
/* Wake up guardian. */
guardian->ping();
}
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Static operations.
**************************************************************************/
/**
The operation is intended to check whether string is a well-formed
instance name or not.
SYNOPSIS
is_name_valid()
name string to check
RETURN
TRUE string is a valid instance name
FALSE string is not a valid instance name
TODO: Move to Instance_name class: Instance_name::is_valid().
*/
bool Instance::is_name_valid(const LEX_STRING *name)
{
......@@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name)
}
/**
The operation is intended to check if the given instance name is
mysqld-compatible or not.
SYNOPSIS
is_mysqld_compatible_name()
name name to check
RETURN
TRUE name is mysqld-compatible
FALSE otherwise
TODO: Move to Instance_name class: Instance_name::is_mysqld_compatible().
*/
bool Instance::is_mysqld_compatible_name(const LEX_STRING *name)
{
return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0;
}
/**
Return client state name. Must not be used outside the class.
Use Instance::get_state_name() instead.
*/
const char * Instance::get_instance_state_name(enum_instance_state state)
{
switch (state) {
case STOPPED:
return "offline";
case NOT_STARTED:
return "not started";
case STARTING:
return "starting";
case STARTED:
return "online";
case JUST_CRASHED:
return "failed";
case CRASHED:
return "crashed";
/* {{{ Constructor & destructor */
case CRASHED_AND_ABANDONED:
return "abandoned";
case STOPPING:
return "stopping";
}
return NULL; /* just to ignore compiler warning. */
}
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Initialization & deinitialization.
**************************************************************************/
Instance::Instance()
:crashed(FALSE),
configured(FALSE)
:monitoring_thread_active(FALSE),
crashed(FALSE),
configured(FALSE),
/* mysqld_compatible is initialized in init() */
state(NOT_STARTED),
restart_counter(0),
crash_moment(0),
last_checked(0)
{
pthread_mutex_init(&LOCK_instance, 0);
pthread_cond_init(&COND_instance_stopped, 0);
}
......@@ -427,13 +499,11 @@ Instance::~Instance()
{
log_info("Instance '%s': destroying...", (const char *) get_name()->str);
pthread_cond_destroy(&COND_instance_stopped);
pthread_mutex_destroy(&LOCK_instance);
}
/* }}} */
/*
/**
Initialize instance options.
SYNOPSIS
......@@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg)
}
/*
/**
Complete instance options initialization.
SYNOPSIS
......@@ -474,7 +544,47 @@ bool Instance::complete_initialization()
*/
}
/*
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Instance: public interface implementation.
**************************************************************************/
/**
Determine if there is some activity with the instance.
SYNOPSIS
is_active()
DESCRIPTION
An instance is active if one of the following conditions is true:
- Instance-monitoring thread is running;
- Instance is guarded and its state is other than STOPPED;
- Corresponding mysqld-server accepts connections.
MT-NOTE: instance must be locked before calling the operation.
RETURN
TRUE - instance is active
FALSE - otherwise.
*/
bool Instance::is_active()
{
if (monitoring_thread_active)
return TRUE;
if (is_guarded() && get_state() != STOPPED)
return TRUE;
return is_mysqld_running();
}
/**
Determine if mysqld is accepting connections.
SYNOPSIS
......@@ -484,7 +594,7 @@ bool Instance::complete_initialization()
Try to connect to mysqld with fake login/password to check whether it is
accepting connections or not.
MT-NOTE: this operation must be called under acquired LOCK_instance.
MT-NOTE: instance must be locked before calling the operation.
RETURN
TRUE - mysqld is alive and accept connections
......@@ -508,8 +618,6 @@ bool Instance::is_mysqld_running()
if (!port && !options.mysqld_socket)
port= SERVER_DEFAULT_PORT;
pthread_mutex_lock(&LOCK_instance);
mysql_init(&mysql);
/* try to connect to a server with a fake username/password pair */
if (mysql_real_connect(&mysql, LOCAL_HOST, username,
......@@ -523,7 +631,6 @@ bool Instance::is_mysqld_running()
*/
log_error("Instance '%s': was able to log into mysqld.",
(const char *) get_name()->str);
pthread_mutex_unlock(&LOCK_instance);
return_val= TRUE; /* server is alive */
}
else
......@@ -531,145 +638,145 @@ bool Instance::is_mysqld_running()
sizeof(access_denied_message) - 1));
mysql_close(&mysql);
pthread_mutex_unlock(&LOCK_instance);
return return_val;
}
/*
The method starts an instance.
/**
Start mysqld.
SYNOPSIS
start()
start_mysqld()
DESCRIPTION
Reset flags and start Instance Monitor thread, which will start mysqld.
MT-NOTE: instance must be locked before calling the operation.
RETURN
0 ok
ER_CANNOT_START_INSTANCE Cannot start instance
ER_INSTANCE_ALREADY_STARTED The instance on the specified port/socket
is already started
FALSE - ok
TRUE - could not start instance
*/
int Instance::start()
bool Instance::start_mysqld()
{
/* clear crash flag */
pthread_mutex_lock(&LOCK_instance);
crashed= FALSE;
pthread_mutex_unlock(&LOCK_instance);
Instance_monitor *instance_monitor;
/*
Prepare instance to start Instance Monitor thread.
if (configured && !is_mysqld_running())
{
Instance_monitor *instance_monitor;
remove_pid();
NOTE: It's important to set these actions here in order to avoid
race conditions -- these actions must be done under acquired lock on
Instance.
*/
instance_monitor= new Instance_monitor(this);
crashed= FALSE;
monitoring_thread_active= TRUE;
if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED))
{
delete instance_monitor;
log_error("Instance::start(): failed to create the monitoring thread"
" to start an instance");
return ER_CANNOT_START_INSTANCE;
}
/* The monitoring thread will delete itself when it's finished. */
remove_pid();
return 0;
}
/* Create and start the Instance Monitor thread. */
/* The instance is started already or misconfigured. */
return configured ? ER_INSTANCE_ALREADY_STARTED : ER_INSTANCE_MISCONFIGURED;
}
instance_monitor= new Instance_monitor(this);
/*
The method sets the crash flag and wakes all waiters on
COND_instance_stopped and COND_guardian
if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED))
{
delete instance_monitor;
monitoring_thread_active= FALSE;
SYNOPSIS
set_crash_flag_n_wake_all()
log_error("Instance '%s': can not create instance monitor thread.",
(const char *) get_name()->str);
DESCRIPTION
The method is called when an instance is crashed or terminated.
In the former case it might indicate that guardian probably should
restart it.
return TRUE;
}
RETURN
Function returns no value
*/
++restart_counter;
void Instance::set_crash_flag_n_wake_all()
{
/* set instance state to crashed */
pthread_mutex_lock(&LOCK_instance);
crashed= TRUE;
pthread_mutex_unlock(&LOCK_instance);
/* The Instance Monitor thread will delete itself when it's finished. */
/*
Wake connection threads waiting for an instance to stop. This
is needed if a user issued command to stop an instance via
mysql connection. This is not the case if Guardian stop the thread.
*/
pthread_cond_signal(&COND_instance_stopped);
/* wake guardian */
pthread_cond_signal(&Manager::get_guardian()->COND_guardian);
return FALSE;
}
/*
Stop an instance.
/**
Stop mysqld.
SYNOPSIS
stop()
stop_mysqld()
RETURN:
0 ok
ER_INSTANCE_IS_NOT_STARTED Looks like the instance it is not started
ER_STOP_INSTANCE mysql_shutdown reported an error
*/
DESCRIPTION
Try to stop mysqld gracefully. Otherwise kill it with SIGKILL.
int Instance::stop()
{
struct timespec timeout;
uint waitchild= (uint) DEFAULT_SHUTDOWN_DELAY;
MT-NOTE: instance must be locked before calling the operation.
if (is_mysqld_running())
{
waitchild= options.get_shutdown_delay();
RETURN
FALSE - ok
TRUE - could not stop the instance
*/
kill_mysqld(SIGTERM);
/* sleep on condition to wait for SIGCHLD */
bool Instance::stop_mysqld()
{
log_info("Instance '%s': stopping mysqld...",
(const char *) get_name()->str);
timeout.tv_sec= time(NULL) + waitchild;
timeout.tv_nsec= 0;
if (pthread_mutex_lock(&LOCK_instance))
return ER_STOP_INSTANCE;
kill_mysqld(SIGTERM);
while (options.load_pid() != 0) /* while server isn't stopped */
{
int status;
if (!wait_for_stop())
{
log_info("Instance '%s': mysqld stopped gracefully.",
(const char *) get_name()->str);
return FALSE;
}
status= pthread_cond_timedwait(&COND_instance_stopped,
&LOCK_instance,
&timeout);
if (status == ETIMEDOUT || status == ETIME)
break;
}
log_info("Instance '%s': mysqld failed to stop gracefully within %d seconds.",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
pthread_mutex_unlock(&LOCK_instance);
log_info("Instance'%s': killing mysqld...",
(const char *) get_name()->str);
kill_mysqld(SIGKILL);
kill_mysqld(SIGKILL);
return 0;
if (!wait_for_stop())
{
log_info("Instance '%s': mysqld has been killed.",
(const char *) get_name()->str);
return FALSE;
}
return ER_INSTANCE_IS_NOT_STARTED;
log_info("Instance '%s': can not kill mysqld within %d seconds.",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
return TRUE;
}
/*
/**
Send signal to mysqld.
SYNOPSIS
kill_mysqld()
DESCRIPTION
Load pid from the pid file and send the given signal to that process.
If the signal is SIGKILL, remove the pid file after sending the signal.
MT-NOTE: instance must be locked before calling the operation.
TODO
This too low-level and OS-specific operation for public interface.
Also, it has some implicit behaviour for SIGKILL signal. Probably, we
should have the following public operations instead:
- start_mysqld() -- as is;
- stop_mysqld -- request mysqld to shutdown gracefully (send SIGTERM);
don't wait for complete shutdown;
- wait_for_stop() (or join_mysqld()) -- wait for mysqld to stop within
time interval;
- kill_mysqld() -- request to terminate mysqld; don't wait for
completion.
These operations should also be used in Guardian to manage instances.
*/
void Instance::kill_mysqld(int signum)
......@@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum)
}
}
/*
Return crashed flag.
SYNOPSIS
is_crashed()
RETURN
TRUE - mysqld crashed
FALSE - mysqld hasn't crashed yet
/**
Lock instance.
*/
bool Instance::is_crashed()
void Instance::lock()
{
bool val;
pthread_mutex_lock(&LOCK_instance);
val= crashed;
}
/**
Unlock instance.
*/
void Instance::unlock()
{
pthread_mutex_unlock(&LOCK_instance);
return val;
}
/*
/**
Return instance state name.
SYNOPSIS
get_state_name()
DESCRIPTION
The operation returns user-friendly state name. The operation can be
used both for guarded and non-guarded instances.
MT-NOTE: instance must be locked before calling the operation.
TODO: Replace with the static get_state_name(state_code) function.
*/
const char *Instance::get_state_name()
{
if (!is_configured())
return "misconfigured";
if (is_guarded())
{
/* The instance is managed by Guardian: we can report precise state. */
return get_instance_state_name(get_state());
}
/* The instance is not managed by Guardian: we can report status only. */
return is_active() ? "online" : "offline";
}
/**
Reset statistics.
SYNOPSIS
reset_stat()
DESCRIPTION
The operation resets statistics used for guarding the instance.
MT-NOTE: instance must be locked before calling the operation.
TODO: Make private.
*/
void Instance::reset_stat()
{
restart_counter= 0;
crash_moment= 0;
last_checked= 0;
}
/**************************************************************************
}}}
**************************************************************************/
/**************************************************************************
{{{ Instance: implementation of private operations.
**************************************************************************/
/**
Remove pid file.
*/
......@@ -744,3 +915,36 @@ void Instance::remove_pid()
(const char *) options.instance_name.str);
}
}
/**
Wait for mysqld to stop within shutdown interval.
*/
bool Instance::wait_for_stop()
{
int start_time= time(NULL);
int finish_time= start_time + options.get_shutdown_delay();
log_info("Instance '%s': waiting for mysqld to stop "
"(timeout: %d seconds)...",
(const char *) get_name()->str,
(int) options.get_shutdown_delay());
while (true)
{
if (options.load_pid() == 0 && !is_mysqld_running())
return FALSE;
if (time(NULL) >= finish_time)
return TRUE;
/* Sleep for 0.3 sec and check again. */
my_sleep(300000);
}
}
/**************************************************************************
}}}
**************************************************************************/
......@@ -30,7 +30,7 @@ class Instance_map;
class Thread_registry;
/*
/**
Instance_name -- the class represents instance name -- a string of length
less than MAX_INSTANCE_NAME_SIZE.
......@@ -68,72 +68,127 @@ private:
class Instance
{
public:
/*
The following two constants defines name of the default mysqld-instance
("mysqld").
/* States of an instance. */
enum enum_instance_state
{
STOPPED,
NOT_STARTED,
STARTING,
STARTED,
JUST_CRASHED,
CRASHED,
CRASHED_AND_ABANDONED,
STOPPING
};
public:
/**
The constant defines name of the default mysqld-instance ("mysqld").
*/
static const LEX_STRING DFLT_INSTANCE_NAME;
public:
/*
The operation is intended to check whether string is a well-formed
instance name or not.
*/
static bool is_name_valid(const LEX_STRING *name);
/*
The operation is intended to check if the given instance name is
mysqld-compatible or not.
*/
static bool is_mysqld_compatible_name(const LEX_STRING *name);
public:
Instance();
~Instance();
bool init(const LEX_STRING *name_arg);
bool complete_initialization();
public:
bool is_active();
bool is_mysqld_running();
int start();
int stop();
/* send a signal to the instance */
bool start_mysqld();
bool stop_mysqld();
void kill_mysqld(int signo);
bool is_crashed();
void set_crash_flag_n_wake_all();
/*
void lock();
void unlock();
const char *get_state_name();
void reset_stat();
public:
/**
The operation is intended to check if the instance is mysqld-compatible
or not.
*/
inline bool is_mysqld_compatible() const;
/*
/**
The operation is intended to check if the instance is configured properly
or not. Misconfigured instances are not managed.
*/
inline bool is_configured() const;
/**
The operation returns TRUE if the instance is guarded and FALSE otherwise.
*/
inline bool is_guarded() const;
/**
The operation returns name of the instance.
*/
inline const LEX_STRING *get_name() const;
/**
The operation returns the current state of the instance.
NOTE: At the moment should be used only for guarded instances.
*/
inline enum_instance_state get_state() const;
/**
The operation changes the state of the instance.
NOTE: At the moment should be used only for guarded instances.
TODO: Make private.
*/
inline void set_state(enum_instance_state new_state);
/**
The operation returns crashed flag.
*/
inline bool is_crashed();
public:
enum { DEFAULT_SHUTDOWN_DELAY= 35 };
/**
This attributes contains instance options.
TODO: Make private.
*/
Instance_options options;
private:
/* This attributes is a flag, specifies if the instance has been crashed. */
/**
monitoring_thread_active is TRUE if there is a thread that monitors the
corresponding mysqld-process.
*/
bool monitoring_thread_active;
/**
crashed is TRUE when corresponding mysqld-process has been died after
start.
*/
bool crashed;
/*
This attribute specifies if the instance is configured properly or not.
/**
configured is TRUE when the instance is configured and FALSE otherwise.
Misconfigured instances are not managed.
*/
bool configured;
/*
This attribute specifies whether the instance is mysqld-compatible or not.
Mysqld-compatible instances can contain only mysqld-specific options.
At the moment an instance is mysqld-compatible if its name is "mysqld".
mysqld_compatible specifies whether the instance is mysqld-compatible
or not. Mysqld-compatible instances can contain only mysqld-specific
options. At the moment an instance is mysqld-compatible if its name is
"mysqld".
The idea is that [mysqld] section should contain only mysqld-specific
options (no Instance Manager-specific options) to be readable by mysqld
......@@ -142,18 +197,36 @@ private:
bool mysqld_compatible;
/*
Mutex protecting the instance. Currently we use it to avoid the
double start of the instance. This happens when the instance is starting
and we issue the start command once more.
Mutex protecting the instance.
*/
pthread_mutex_t LOCK_instance;
/*
This condition variable is used to wake threads waiting for instance to
stop in Instance::stop()
*/
pthread_cond_t COND_instance_stopped;
void remove_pid();
private:
/* Guarded-instance attributes. */
/* state of an instance (i.e. STARTED, CRASHED, etc.) */
enum_instance_state state;
public:
/* the amount of attemts to restart instance (cleaned up at success) */
int restart_counter;
/* triggered at a crash */
time_t crash_moment;
/* General time field. Used to provide timeouts (at shutdown and restart) */
time_t last_checked;
private:
static const char *get_instance_state_name(enum_instance_state state);
private:
void remove_pid();
bool wait_for_stop();
private:
friend class Instance_monitor;
};
......@@ -169,9 +242,33 @@ inline bool Instance::is_configured() const
}
inline bool Instance::is_guarded() const
{
return !options.nonguarded;
}
inline const LEX_STRING *Instance::get_name() const
{
return &options.instance_name;
}
inline Instance::enum_instance_state Instance::get_state() const
{
return state;
}
inline void Instance::set_state(enum_instance_state new_state)
{
state= new_state;
}
inline bool Instance::is_crashed()
{
return crashed;
}
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */
......@@ -25,26 +25,18 @@
#include <mysql_com.h>
#include "buffer.h"
#include "guardian.h"
#include "instance.h"
#include "log.h"
#include "manager.h"
#include "mysqld_error.h"
#include "mysql_manager_error.h"
#include "options.h"
#include "priv.h"
/*
Note: As we are going to suppost different types of connections,
we shouldn't have connection-specific functions. To avoid it we could
put such functions to the Command-derived class instead.
The command could be easily constructed for a specific connection if
we would provide a special factory for each connection.
*/
C_MODE_START
/* Procedure needed for HASH initialization */
/**
HASH-routines: get key of instance for storing in hash.
*/
static byte* get_instance_key(const byte* u, uint* len,
my_bool __attribute__((unused)) t)
......@@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len,
return (byte *) instance->options.instance_name.str;
}
/**
HASH-routines: cleanup handler.
*/
static void delete_instance(void *u)
{
Instance *instance= (Instance *) u;
delete instance;
}
/*
The option handler to pass to the process_default_option_files finction.
/**
The option handler to pass to the process_default_option_files function.
SYNOPSIS
process_option()
......@@ -96,8 +92,8 @@ static int process_option(void *ctx, const char *group, const char *option)
C_MODE_END
/*
Parse option string.
/**
Parse option string.
SYNOPSIS
parse_option()
......@@ -137,7 +133,7 @@ static void parse_option(const char *option_str,
}
/*
/**
Process one option from the configuration file.
SYNOPSIS
......@@ -151,6 +147,10 @@ static void parse_option(const char *option_str,
process_option(). The caller ensures proper locking
of the instance map object.
*/
/*
Process a given option and assign it to appropricate instance. This is
required for the option handler, passed to my_search_option_files().
*/
int Instance_map::process_one_option(const LEX_STRING *group,
const char *option)
......@@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group,
}
/**
Instance_map constructor.
*/
Instance_map::Instance_map()
{
pthread_mutex_init(&LOCK_instance_map, 0);
}
/**
Initialize Instance_map internals.
*/
bool Instance_map::init()
{
return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
get_instance_key, delete_instance, 0);
}
/**
Reset Instance_map data.
*/
bool Instance_map::reset()
{
hash_free(&hash);
return init();
}
/**
Instance_map destructor.
*/
Instance_map::~Instance_map()
{
pthread_mutex_lock(&LOCK_instance_map);
lock();
/*
NOTE: it's necessary to synchronize on each instance before removal,
because Instance-monitoring thread can be still alive an hold the mutex
(because it is detached and we have no control over it).
*/
while (true)
{
Iterator it(this);
Instance *instance= it.next();
if (!instance)
break;
instance->lock();
instance->unlock();
remove_instance(instance);
}
hash_free(&hash);
pthread_mutex_unlock(&LOCK_instance_map);
unlock();
pthread_mutex_destroy(&LOCK_instance_map);
}
/**
Lock Instance_map.
*/
void Instance_map::lock()
{
pthread_mutex_lock(&LOCK_instance_map);
}
/**
Unlock Instance_map.
*/
void Instance_map::unlock()
{
pthread_mutex_unlock(&LOCK_instance_map);
}
/*
Re-read instance configuration file.
SYNOPSIS
Instance_map::flush_instances()
DESCRIPTION
This function will:
- clear the current list of instances. This removes both
running and stopped instances.
- load a new instance configuration from the file.
- pass on the new map to the guardian thread: it will start
all instances that are marked `guarded' and not yet started.
Note, as the check whether an instance is started is currently
very simple (returns TRUE if there is a MySQL server running
at the given port), this function has some peculiar
side-effects:
* if the port number of a running instance was changed, the
old instance is forgotten, even if it was running. The new
instance will be started at the new port.
* if the configuration was changed in a way that two
instances swapped their port numbers, the guardian thread
will not notice that and simply report that both instances
are configured successfully and running.
In order to avoid such side effects one should never call
FLUSH INSTANCES without prior stop of all running instances.
NOTE: The operation should be invoked with the following locks acquired:
- Guardian;
- Instance_map;
/**
Check if there is an active instance or not.
*/
int Instance_map::flush_instances()
{
int rc;
/*
Guardian thread relies on the instance map repository for guarding
instances. This is why refreshing instance map, we need (1) to stop
guardian (2) reload the instance map (3) reinitialize the guardian
with new instances.
*/
hash_free(&hash);
hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
get_instance_key, delete_instance, 0);
rc= load();
/* don't init guardian if we failed to load instances */
if (!rc)
guardian->init(); // TODO: check error status.
return rc;
}
bool Instance_map::is_there_active_instance()
{
Instance *instance;
......@@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance()
while ((instance= iterator.next()))
{
if (guardian->find_instance_node(instance) != NULL ||
instance->is_mysqld_running())
{
bool active_instance_found;
instance->lock();
active_instance_found= instance->is_active();
instance->unlock();
if (active_instance_found)
return TRUE;
}
}
return FALSE;
}
/**
Add an instance into the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::add_instance(Instance *instance)
{
return my_hash_insert(&hash, (byte *) instance);
}
/**
Remove instance from the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::remove_instance(Instance *instance)
{
return hash_delete(&hash, (byte *) instance);
}
/**
Create a new instance and register it in the internal hash.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
int Instance_map::create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options)
{
......@@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name,
}
/**
Return a pointer to the instance or NULL, if there is no such instance.
MT-NOTE: Instance Map must be locked before calling the operation.
*/
Instance * Instance_map::find(const LEX_STRING *name)
{
return (Instance *) hash_search(&hash, (byte *) name->str, name->length);
}
/**
Init instances command line arguments after all options have been loaded.
*/
bool Instance_map::complete_initialization()
{
bool mysqld_found;
......@@ -455,7 +491,10 @@ bool Instance_map::complete_initialization()
}
/* load options from config files and create appropriate instance structures */
/**
Load options from config files and create appropriate instance
structures.
*/
int Instance_map::load()
{
......@@ -505,8 +544,9 @@ int Instance_map::load()
}
/*--- Implementaton of the Instance map iterator class ---*/
/*************************************************************************
{{{ Instance_map::Iterator implementation.
*************************************************************************/
void Instance_map::Iterator::go_to_first()
{
......@@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next()
return NULL;
}
const char *Instance_map::get_instance_state_name(Instance *instance)
{
LIST *instance_node;
if (!instance->is_configured())
return "misconfigured";
if ((instance_node= guardian->find_instance_node(instance)) != NULL)
{
/* The instance is managed by Guardian: we can report precise state. */
return Guardian::get_instance_state_name(
guardian->get_instance_state(instance_node));
}
/* The instance is not managed by Guardian: we can report status only. */
return instance->is_mysqld_running() ? "online" : "offline";
}
/*************************************************************************
}}}
*************************************************************************/
/*
/**
Create a new configuration section for mysqld-instance in the config file.
SYNOPSIS
......
......@@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name,
const Named_value_arr *options);
/*
/**
Instance_map - stores all existing instances
*/
class Instance_map
{
public:
/* Instance_map iterator */
/**
Instance_map iterator
*/
class Iterator
{
private:
......@@ -58,79 +61,43 @@ public:
void go_to_first();
Instance *next();
};
friend class Iterator;
public:
/*
Return a pointer to the instance or NULL, if there is no such instance.
MT-NOTE: must be called under acquired lock.
*/
Instance *find(const LEX_STRING *name);
/* Clear the configuration cache and reload the configuration file. */
int flush_instances();
/* The operation is used to check if there is an active instance or not. */
bool is_there_active_instance();
void lock();
void unlock();
bool init();
bool reset();
/*
Process a given option and assign it to appropricate instance. This is
required for the option handler, passed to my_search_option_files().
*/
int process_one_option(const LEX_STRING *group, const char *option);
int load();
/*
Add an instance into the internal hash.
int process_one_option(const LEX_STRING *group, const char *option);
MT-NOTE: the operation must be called under acquired lock.
*/
int add_instance(Instance *instance);
/*
Remove instance from the internal hash.
MT-NOTE: the operation must be called under acquired lock.
*/
int remove_instance(Instance *instance);
/*
Create a new instance and register it in the internal hash.
MT-NOTE: the operation must be called under acquired lock.
*/
int create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options);
public:
Instance_map();
~Instance_map();
/*
Retrieve client state name of the given instance.
MT-NOTE: the options must be called under acquired locks of the following
objects:
- Instance_map;
- Guardian;
*/
const char *get_instance_state_name(Instance *instance);
public:
const char *mysqld_path;
Guardian *guardian;
private:
/* loads options from config files */
int load();
/* inits instances argv's after all options have been loaded */
bool complete_initialization();
private:
enum { START_HASH_SIZE = 16 };
pthread_mutex_t LOCK_instance_map;
HASH hash;
private:
friend class Iterator;
};
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */
......@@ -46,7 +46,6 @@ public:
Instance_options();
~Instance_options();
/* fills in argv */
bool complete_initialization();
bool set_option(Named_value *option);
......
......@@ -37,6 +37,9 @@
#include "user_map.h"
/**********************************************************************
{{{ Platform-specific implementation.
**********************************************************************/
#ifndef __WIN__
void set_signals(sigset_t *mask)
......@@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig)
#endif
/**********************************************************************
}}}
**********************************************************************/
/**********************************************************************
Implementation of checking the actual thread model.
{{{ Implementation of checking the actual thread model.
***********************************************************************/
namespace { /* no-indent */
......@@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads)
}
/**********************************************************************
}}}
***********************************************************************/
/**********************************************************************
Manager implementation
......@@ -152,25 +163,37 @@ bool Manager::linux_threads;
#endif // __WIN__
/**
Request shutdown of guardian and threads registered in Thread_registry.
SYNOPSIS
stop_all_threads()
*/
void Manager::stop_all_threads()
{
/*
Let guardian thread know that it should break it's processing cycle,
Let Guardian thread know that it should break it's processing cycle,
once it wakes up.
*/
p_guardian->request_shutdown();
/* wake guardian */
pthread_cond_signal(&p_guardian->COND_guardian);
/* stop all threads */
/* Stop all threads. */
p_thread_registry->deliver_shutdown();
}
/*
manager - entry point to the main instance manager process: start
listener thread, write pid file and enter into signal handling.
See also comments in mysqlmanager.cc to picture general Instance Manager
architecture.
/**
Main manager function.
SYNOPSIS
main()
DESCRIPTION
This is an entry point to the main instance manager process:
start listener thread, write pid file and enter into signal handling.
See also comments in mysqlmanager.cc to picture general Instance Manager
architecture.
TODO: how about returning error status.
*/
......@@ -194,22 +217,33 @@ int Manager::main()
(const char *) (linux_threads ? "LINUX threads" : "POSIX threads"));
#endif // __WIN__
Thread_registry thread_registry;
/*
All objects created in the manager() function live as long as
thread_registry lives, and thread_registry is alive until there are
working threads.
All objects created in the Manager object live as long as thread_registry
lives, and thread_registry is alive until there are working threads.
There are two main purposes of the Thread Registry:
1. Interrupt blocking I/O and signal condition variables in case of
shutdown;
2. Wait for detached threads before shutting down the main thread.
NOTE:
1. Handling shutdown can be done in more elegant manner by introducing
Event (or Condition) object with support of logical operations.
2. Using Thread Registry to wait for detached threads is definitely not
the best way, because when Thread Registry unregisters an thread, the
thread is still alive. Accurate way to wait for threads to stop is
not using detached threads and join all threads before shutdown.
*/
Thread_registry thread_registry;
User_map user_map;
Instance_map instance_map;
Guardian guardian(&thread_registry, &instance_map,
Options::Main::monitoring_interval);
Guardian guardian(&thread_registry, &instance_map);
Listener listener(&thread_registry, &user_map);
p_instance_map= &instance_map;
p_guardian= instance_map.guardian= &guardian;
p_guardian= &guardian;
p_thread_registry= &thread_registry;
p_user_map= &user_map;
......@@ -249,7 +283,7 @@ int Manager::main()
}
}
/* write Instance Manager pid file */
/* Write Instance Manager pid file. */
log_info("IM pid file: '%s'; PID: %d.",
(const char *) Options::Main::pid_file_name,
......@@ -290,6 +324,7 @@ int Manager::main()
permitted to process instances. And before flush_instances() has
completed, there are no instances to guard.
*/
if (guardian.start(Thread::DETACHED))
{
log_error("Can not start Guardian thread.");
......@@ -298,21 +333,11 @@ int Manager::main()
/* Load instances. */
if (Manager::flush_instances())
{
instance_map.guardian->lock();
instance_map.lock();
int flush_instances_status= instance_map.flush_instances();
instance_map.unlock();
instance_map.guardian->unlock();
if (flush_instances_status)
{
log_error("Can not init instances repository.");
stop_all_threads();
goto err;
}
log_error("Can not init instances repository.");
stop_all_threads();
goto err;
}
/* Initialize the Listener. */
......@@ -328,7 +353,8 @@ int Manager::main()
After the list of guarded instances have been initialized,
Guardian should start them.
*/
pthread_cond_signal(&guardian.COND_guardian);
guardian.ping();
/* Main loop. */
......@@ -381,7 +407,6 @@ int Manager::main()
if (!guardian.is_stopped())
{
guardian.request_shutdown();
pthread_cond_signal(&guardian.COND_guardian);
}
else
{
......@@ -406,3 +431,64 @@ err:
#endif
return rc;
}
/**
Re-read instance configuration file.
SYNOPSIS
flush_instances()
DESCRIPTION
This function will:
- clear the current list of instances. This removes both
running and stopped instances.
- load a new instance configuration from the file.
- pass on the new map to the guardian thread: it will start
all instances that are marked `guarded' and not yet started.
Note, as the check whether an instance is started is currently
very simple (returns TRUE if there is a MySQL server running
at the given port), this function has some peculiar
side-effects:
* if the port number of a running instance was changed, the
old instance is forgotten, even if it was running. The new
instance will be started at the new port.
* if the configuration was changed in a way that two
instances swapped their port numbers, the guardian thread
will not notice that and simply report that both instances
are configured successfully and running.
In order to avoid such side effects one should never call
FLUSH INSTANCES without prior stop of all running instances.
*/
bool Manager::flush_instances()
{
p_instance_map->lock();
if (p_instance_map->is_there_active_instance())
{
p_instance_map->unlock();
return TRUE;
}
if (p_instance_map->reset())
{
p_instance_map->unlock();
return TRUE;
}
if (p_instance_map->load())
{
p_instance_map->unlock();
return TRUE; /* Don't init guardian if we failed to load instances. */
}
get_guardian()->init(); /* TODO: check error status. */
get_guardian()->ping();
p_instance_map->unlock();
return FALSE;
}
......@@ -19,6 +19,7 @@
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface
#endif
#include <my_global.h>
class Guardian;
......@@ -30,8 +31,12 @@ class Manager
{
public:
static int main();
static bool flush_instances();
public:
/**
These methods return a non-zero value only for the duration
These methods return a non-NULL value only for the duration
of main().
*/
static Instance_map *get_instance_map() { return p_instance_map; }
......@@ -39,6 +44,7 @@ public:
static Thread_registry *get_thread_registry() { return p_thread_registry; }
static User_map *get_user_map() { return p_user_map; }
public:
#ifndef __WIN__
static bool is_linux_threads() { return linux_threads; }
#endif // __WIN__
......
......@@ -42,7 +42,7 @@ int User::init(const char *line)
if (name_end == 0 || name_end[1] != ':')
{
log_error("Invalid format (unmatched quote) of user line (%s).",
(const char *) line);
(const char *) line);
return 1;
}
password= name_end + 2;
......@@ -54,7 +54,7 @@ int User::init(const char *line)
if (name_end == 0)
{
log_error("Invalid format (no delimiter) of user line (%s).",
(const char *) line);
(const char *) line);
return 1;
}
password= name_end + 1;
......@@ -64,10 +64,10 @@ int User::init(const char *line)
if (user_length > USERNAME_LENGTH)
{
log_error("User name is too long (%d). Max length: %d. "
"User line: '%s'.",
(int) user_length,
(int) USERNAME_LENGTH,
(const char *) line);
"User line: '%s'.",
(int) user_length,
(int) USERNAME_LENGTH,
(const char *) line);
return 1;
}
......@@ -75,10 +75,10 @@ int User::init(const char *line)
if (password_length > SCRAMBLED_PASSWORD_CHAR_LENGTH)
{
log_error("Password is too long (%d). Max length: %d."
"User line: '%s'.",
(int) password_length,
(int) SCRAMBLED_PASSWORD_CHAR_LENGTH,
line);
"User line: '%s'.",
(int) password_length,
(int) SCRAMBLED_PASSWORD_CHAR_LENGTH,
(const char *) line);
return 1;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment