mtr_process.pl 29.5 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4 5 6
# -*- cperl -*-

# This is a library file used by the Perl version of mysql-test-run,
# and is part of the translation of the Bourne shell script with the
# same name.

unknown's avatar
unknown committed
7 8
use Socket;
use Errno;
unknown's avatar
unknown committed
9 10
use strict;

unknown's avatar
unknown committed
11
use POSIX 'WNOHANG';
unknown's avatar
unknown committed
12

13 14
sub mtr_run ($$$$$$;$);
sub mtr_spawn ($$$$$$;$);
unknown's avatar
unknown committed
15
sub mtr_check_stop_servers ($);
unknown's avatar
unknown committed
16
sub mtr_kill_leftovers ();
unknown's avatar
unknown committed
17
sub mtr_wait_blocking ($);
unknown's avatar
unknown committed
18
sub mtr_record_dead_children ();
unknown's avatar
unknown committed
19 20
sub mtr_ndbmgm_start($$);
sub mtr_mysqladmin_start($$$);
unknown's avatar
unknown committed
21
sub mtr_exit ($);
unknown's avatar
unknown committed
22
sub sleep_until_file_created ($$$);
23
sub mtr_kill_processes ($);
unknown's avatar
unknown committed
24 25
sub mtr_ping_with_timeout($);
sub mtr_ping_port ($);
unknown's avatar
unknown committed
26 27

# static in C
28
sub spawn_impl ($$$$$$$$);
unknown's avatar
unknown committed
29 30 31 32 33 34 35 36 37

##############################################################################
#
#  Execute an external command
#
##############################################################################

# This function try to mimic the C version used in "netware/mysql_test_run.c"

38
sub mtr_run ($$$$$$;$) {
unknown's avatar
unknown committed
39 40 41 42 43 44
  my $path=       shift;
  my $arg_list_t= shift;
  my $input=      shift;
  my $output=     shift;
  my $error=      shift;
  my $pid_file=   shift;
45
  my $spawn_opts= shift;
unknown's avatar
unknown committed
46

47 48
  return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,$pid_file,
    $spawn_opts);
unknown's avatar
unknown committed
49 50
}

51
sub mtr_run_test ($$$$$$;$) {
unknown's avatar
unknown committed
52 53 54 55 56 57
  my $path=       shift;
  my $arg_list_t= shift;
  my $input=      shift;
  my $output=     shift;
  my $error=      shift;
  my $pid_file=   shift;
58
  my $spawn_opts= shift;
unknown's avatar
unknown committed
59

60 61
  return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,$pid_file,
    $spawn_opts);
unknown's avatar
unknown committed
62 63
}

64
sub mtr_spawn ($$$$$$;$) {
unknown's avatar
unknown committed
65 66 67 68 69 70
  my $path=       shift;
  my $arg_list_t= shift;
  my $input=      shift;
  my $output=     shift;
  my $error=      shift;
  my $pid_file=   shift;
71
  my $spawn_opts= shift;
unknown's avatar
unknown committed
72

73 74
  return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,$pid_file,
    $spawn_opts);
unknown's avatar
unknown committed
75 76 77 78 79 80 81 82 83
}


##############################################################################
#
#  If $join is set, we return the error code, else we return the PID
#
##############################################################################

84
sub spawn_impl ($$$$$$$$) {
unknown's avatar
unknown committed
85 86
  my $path=       shift;
  my $arg_list_t= shift;
unknown's avatar
unknown committed
87
  my $mode=       shift;
unknown's avatar
unknown committed
88 89 90 91
  my $input=      shift;
  my $output=     shift;
  my $error=      shift;
  my $pid_file=   shift;                 # FIXME
92
  my $spawn_opts= shift;
unknown's avatar
unknown committed
93 94 95

  if ( $::opt_script_debug )
  {
unknown's avatar
unknown committed
96 97 98 99 100
    print STDERR "\n";
    print STDERR "#### ", "-" x 78, "\n";
    print STDERR "#### ", "STDIN  $input\n" if $input;
    print STDERR "#### ", "STDOUT $output\n" if $output;
    print STDERR "#### ", "STDERR $error\n" if $error;
unknown's avatar
unknown committed
101
    print STDERR "#### ", "$mode : $path ", join(" ",@$arg_list_t), "\n";
102 103 104 105 106 107 108 109 110 111 112 113
    print STDERR "#### ", "spawn options:\n";
    if ($spawn_opts)
    {
      foreach my $key (sort keys %{$spawn_opts})
      {
        print STDERR "#### ", "  - $key: $spawn_opts->{$key}\n";
      }
    }
    else
    {
      print STDERR "#### ", "  none\n";
    }
unknown's avatar
unknown committed
114
    print STDERR "#### ", "-" x 78, "\n";
unknown's avatar
unknown committed
115 116
  }

unknown's avatar
unknown committed
117 118 119
  mtr_error("Can't spawn with empty \"path\"") unless defined $path;


unknown's avatar
unknown committed
120
 FORK:
unknown's avatar
unknown committed
121
  {
unknown's avatar
unknown committed
122
    my $pid= fork();
unknown's avatar
unknown committed
123

unknown's avatar
unknown committed
124
    if ( ! defined $pid )
unknown's avatar
unknown committed
125
    {
unknown's avatar
unknown committed
126
      if ( $! == $!{EAGAIN} )           # See "perldoc Errno"
unknown's avatar
unknown committed
127
      {
unknown's avatar
unknown committed
128 129 130
        mtr_debug("Got EAGAIN from fork(), sleep 1 second and redo");
        sleep(1);
        redo FORK;
unknown's avatar
unknown committed
131
      }
unknown's avatar
unknown committed
132
      else
unknown's avatar
unknown committed
133
      {
unknown's avatar
unknown committed
134
        mtr_error("$path ($pid) can't be forked");
unknown's avatar
unknown committed
135
      }
unknown's avatar
unknown committed
136 137 138 139 140
    }

    if ( $pid )
    {
      spawn_parent_impl($pid,$mode,$path);
unknown's avatar
unknown committed
141 142 143
    }
    else
    {
unknown's avatar
unknown committed
144 145 146 147 148 149 150 151
      # Child, redirect output and exec
      # FIXME I tried POSIX::setsid() here to detach and, I hoped,
      # avoid zombies. But everything went wild, somehow the parent
      # became a deamon as well, and was hard to kill ;-)
      # Need to catch SIGCHLD and do waitpid or something instead......

      $SIG{INT}= 'DEFAULT';         # Parent do some stuff, we don't

152 153 154 155 156 157 158
      my $log_file_open_mode = '>';

      if ($spawn_opts and $spawn_opts->{'append_log_file'})
      {
        $log_file_open_mode = '>>';
      }

unknown's avatar
unknown committed
159 160
      if ( $output )
      {
unknown's avatar
unknown committed
161 162 163 164 165 166 167 168 169
	if ( $::glob_win32_perl )
	{
	  # Don't redirect stdout on ActiveState perl since this is
          # just another thread in the same process.
          # Should be fixed so that the thread that is created with fork
          # executes the exe in another process and wait's for it to return.
          # In the meanwhile, we get all the output from mysqld's to screen
	}
        elsif ( ! open(STDOUT,$log_file_open_mode,$output) )
unknown's avatar
unknown committed
170
        {
171
          mtr_child_error("can't redirect STDOUT to \"$output\": $!");
unknown's avatar
unknown committed
172 173
        }
      }
174

unknown's avatar
unknown committed
175 176 177 178 179 180
      if ( $error )
      {
        if ( $output eq $error )
        {
          if ( ! open(STDERR,">&STDOUT") )
          {
181
            mtr_child_error("can't dup STDOUT: $!");
unknown's avatar
unknown committed
182 183 184 185
          }
        }
        else
        {
186
          if ( ! open(STDERR,$log_file_open_mode,$error) )
unknown's avatar
unknown committed
187
          {
188
            mtr_child_error("can't redirect STDERR to \"$error\": $!");
unknown's avatar
unknown committed
189 190 191
          }
        }
      }
192

unknown's avatar
unknown committed
193 194 195 196
      if ( $input )
      {
        if ( ! open(STDIN,"<",$input) )
        {
197
          mtr_child_error("can't redirect STDIN to \"$input\": $!");
unknown's avatar
unknown committed
198 199
        }
      }
200 201 202

      if ( ! exec($path,@$arg_list_t) )
      {
203
        mtr_child_error("failed to execute \"$path\": $!");
204
      }
unknown's avatar
unknown committed
205 206
    }
  }
unknown's avatar
unknown committed
207 208 209 210 211 212 213
}


sub spawn_parent_impl {
  my $pid=  shift;
  my $mode= shift;
  my $path= shift;
unknown's avatar
unknown committed
214

unknown's avatar
unknown committed
215 216 217
  if ( $mode eq 'run' or $mode eq 'test' )
  {
    if ( $mode eq 'run' )
unknown's avatar
unknown committed
218
    {
unknown's avatar
unknown committed
219 220
      # Simple run of command, we wait for it to return
      my $ret_pid= waitpid($pid,0);
unknown's avatar
unknown committed
221
      if ( $ret_pid != $pid )
unknown's avatar
unknown committed
222
      {
unknown's avatar
unknown committed
223
        mtr_error("$path ($pid) got lost somehow");
unknown's avatar
unknown committed
224
      }
unknown's avatar
unknown committed
225

unknown's avatar
unknown committed
226
      return mtr_process_exit_status($?);
unknown's avatar
unknown committed
227
    }
unknown's avatar
unknown committed
228
    else
unknown's avatar
unknown committed
229
    {
unknown's avatar
unknown committed
230 231 232 233 234 235 236 237 238 239
      # We run mysqltest and wait for it to return. But we try to
      # catch dying mysqld processes as well.
      #
      # We do blocking waitpid() until we get the return from the
      # "mysqltest" call. But if a mysqld process dies that we
      # started, we take this as an error, and kill mysqltest.
      #
      # FIXME is this as it should be? Can't mysqld terminate
      # normally from running a test case?

unknown's avatar
unknown committed
240
      my $exit_value= -1;
241
      my $saved_exit_value;
unknown's avatar
unknown committed
242 243 244
      my $ret_pid;                      # What waitpid() returns

      while ( ($ret_pid= waitpid(-1,0)) != -1 )
unknown's avatar
unknown committed
245
      {
unknown's avatar
unknown committed
246 247 248 249
        # Someone terminated, don't know who. Collect
        # status info first before $? is lost,
        # but not $exit_value, this is flagged from

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
        my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
        if ( $timer_name )
        {
          if ( $timer_name eq "suite" )
          {
            # We give up here
            # FIXME we should only give up the suite, not all of the run?
            print STDERR "\n";
            mtr_error("Test suite timeout");
          }
          elsif ( $timer_name eq "testcase" )
          {
            $saved_exit_value=  63;       # Mark as timeout
            kill(9, $pid);                # Kill mysqltest
            next;                         # Go on and catch the termination
          }
        }

unknown's avatar
unknown committed
268
        if ( $ret_pid == $pid )
unknown's avatar
unknown committed
269
        {
unknown's avatar
unknown committed
270
          # We got termination of mysqltest, we are done
unknown's avatar
unknown committed
271
          $exit_value= mtr_process_exit_status($?);
unknown's avatar
unknown committed
272
          last;
unknown's avatar
unknown committed
273
        }
unknown's avatar
unknown committed
274

unknown's avatar
unknown committed
275 276
        # One of the child processes died, unless this was expected
	# mysqltest should be killed and test aborted
unknown's avatar
unknown committed
277

unknown's avatar
unknown committed
278
	check_expected_crash_and_restart($ret_pid);
unknown's avatar
unknown committed
279
      }
unknown's avatar
unknown committed
280 281

      if ( $ret_pid != $pid )
unknown's avatar
unknown committed
282
      {
unknown's avatar
unknown committed
283 284
        # We terminated the waiting because a "mysqld" process died.
        # Kill the mysqltest process.
unknown's avatar
unknown committed
285
	mtr_verbose("Kill mysqltest because another process died");
unknown's avatar
unknown committed
286 287 288 289
        kill(9,$pid);

        $ret_pid= waitpid($pid,0);

unknown's avatar
unknown committed
290
        if ( $ret_pid != $pid )
unknown's avatar
unknown committed
291 292 293
        {
          mtr_error("$path ($pid) got lost somehow");
        }
unknown's avatar
unknown committed
294
      }
unknown's avatar
unknown committed
295

296
      return $saved_exit_value || $exit_value;
unknown's avatar
unknown committed
297
    }
unknown's avatar
unknown committed
298 299 300 301 302
  }
  else
  {
    # We spawned a process we don't wait for
    return $pid;
unknown's avatar
unknown committed
303 304 305
  }
}

unknown's avatar
unknown committed
306

unknown's avatar
unknown committed
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
# ----------------------------------------------------------------------
# We try to emulate how an Unix shell calculates the exit code
# ----------------------------------------------------------------------

sub mtr_process_exit_status {
  my $raw_status= shift;

  if ( $raw_status & 127 )
  {
    return ($raw_status & 127) + 128;  # Signal num + 128
  }
  else
  {
    return $raw_status >> 8;           # Exit code
  }
}

unknown's avatar
unknown committed
324

unknown's avatar
unknown committed
325 326 327 328 329 330
##############################################################################
#
#  Kill processes left from previous runs
#
##############################################################################

unknown's avatar
unknown committed
331

unknown's avatar
unknown committed
332 333 334 335
# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with
# this run
# Make sure to remove the PID file, if any.
# kill IM manager first, else it will restart the servers
unknown's avatar
unknown committed
336 337
sub mtr_kill_leftovers () {

unknown's avatar
unknown committed
338 339
  mtr_report("Killing Possible Leftover Processes");
  mtr_debug("mtr_kill_leftovers(): started.");
unknown's avatar
unknown committed
340

unknown's avatar
unknown committed
341 342 343 344
  my @kill_pids;
  my %admin_pids;

  foreach my $srv (@{$::master}, @{$::slave})
unknown's avatar
unknown committed
345
  {
unknown's avatar
unknown committed
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
    mtr_debug("  - mysqld " .
              "(pid: $srv->{pid}; " .
              "pid file: '$srv->{path_pid}'; " .
              "socket: '$srv->{path_sock}'; ".
              "port: $srv->{port})");

    my $pid= mtr_mysqladmin_start($srv, "shutdown", 70);

    # Save the pid of the mysqladmin process
    $admin_pids{$pid}= 1;

    push(@kill_pids,{
		     pid      => $srv->{'pid'},
		     pidfile  => $srv->{'path_pid'},
		     sockfile => $srv->{'path_sock'},
		     port     => $srv->{'port'},
		    });
    $srv->{'pid'}= 0; # Assume we are done with it
unknown's avatar
unknown committed
364 365
  }

unknown's avatar
unknown committed
366
  if ( ! $::opt_skip_ndbcluster )
unknown's avatar
unknown committed
367
  {
unknown's avatar
unknown committed
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    # Start shutdown of clusters.
    mtr_debug("Shutting down cluster...");

    foreach my $cluster (@{$::clusters})
    {
      mtr_debug("  - cluster " .
		"(pid: $cluster->{pid}; " .
		"pid file: '$cluster->{path_pid})");

      my $pid= mtr_ndbmgm_start($cluster, "shutdown");

      # Save the pid of the ndb_mgm process
      $admin_pids{$pid}= 1;

      push(@kill_pids,{
		       pid      => $cluster->{'pid'},
		       pidfile  => $cluster->{'path_pid'}
		      });

      $cluster->{'pid'}= 0; # Assume we are done with it

      foreach my $ndbd (@{$cluster->{'ndbds'}})
      {
	mtr_debug("    - ndbd " .
		  "(pid: $ndbd->{pid}; " .
		  "pid file: '$ndbd->{path_pid})");

	push(@kill_pids,{
			 pid      => $ndbd->{'pid'},
			 pidfile  => $ndbd->{'path_pid'},
			});
	$ndbd->{'pid'}= 0; # Assume we are done with it
      }
    }
unknown's avatar
unknown committed
402 403
  }

unknown's avatar
unknown committed
404 405 406 407 408 409 410 411 412
  # Wait for all the admin processes to complete
  mtr_wait_blocking(\%admin_pids);

  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
  # terminate now, but we don't (FIXME should be debugged).
  # So we try again to ping and at least wait the same amount of time
  # mysqladmin would for all to die.

  mtr_ping_with_timeout(\@kill_pids);
unknown's avatar
unknown committed
413 414 415 416 417 418 419 420 421 422 423

  # We now have tried to terminate nice. We have waited for the listen
  # port to be free, but can't really tell if the mysqld process died
  # or not. We now try to find the process PID from the PID file, and
  # send a kill to that process. Note that Perl let kill(0,@pids) be
  # a way to just return the numer of processes the kernel can send
  # signals to. So this can be used (except on Cygwin) to determine
  # if there are processes left running that we cound out might exists.
  #
  # But still after all this work, all we know is that we have
  # the ports free.
unknown's avatar
unknown committed
424 425

  # We scan the "var/run/" directory for other process id's to kill
unknown's avatar
unknown committed
426 427

  # FIXME $path_run_dir or something
428
  my $rundir= "$::opt_vardir/run";
unknown's avatar
unknown committed
429

unknown's avatar
unknown committed
430 431
  mtr_debug("Processing PID files in directory '$rundir'...");

unknown's avatar
unknown committed
432 433 434 435 436 437 438 439 440
  if ( -d $rundir )
  {
    opendir(RUNDIR, $rundir)
      or mtr_error("can't open directory \"$rundir\": $!");

    my @pids;

    while ( my $elem= readdir(RUNDIR) )
    {
441 442
      # Only read pid from files that end with .pid
      if ( $elem =~ /.*[.]pid$/)
unknown's avatar
unknown committed
443
      {
444
	my $pidfile= "$rundir/$elem";
unknown's avatar
unknown committed
445

446 447 448
	if ( -f $pidfile )
	{
	  mtr_debug("Processing PID file: '$pidfile'...");
unknown's avatar
unknown committed
449

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	  my $pid= mtr_get_pid_from_file($pidfile);

	  mtr_debug("Got pid: $pid from file '$pidfile'");

	  if ( $::glob_cygwin_perl or kill(0, $pid) )
	  {
	    mtr_debug("There is process with pid $pid -- scheduling for kill.");
	    push(@pids, $pid);            # We know (cygwin guess) it exists
	  }
	  else
	  {
	    mtr_debug("There is no process with pid $pid -- skipping.");
	  }
	}
      }
      else
      {
467 468
	mtr_warning("Found non pid file $elem in $rundir")
	  if -f "$rundir/$elem";
469
	next;
unknown's avatar
unknown committed
470 471 472 473
      }
    }
    closedir(RUNDIR);

unknown's avatar
unknown committed
474
    if ( @pids )
unknown's avatar
unknown committed
475
    {
unknown's avatar
unknown committed
476 477 478 479 480
      mtr_debug("Killing the following processes with PID files: " .
                join(' ', @pids) . "...");

      start_reap_all();

unknown's avatar
unknown committed
481
      if ( $::glob_cygwin_perl )
unknown's avatar
unknown committed
482
      {
unknown's avatar
unknown committed
483 484 485 486 487 488 489
        # We have no (easy) way of knowing the Cygwin controlling
        # process, in the PID file we only have the Windows process id.
        system("kill -f " . join(" ",@pids)); # Hope for the best....
        mtr_debug("Sleep 5 seconds waiting for processes to die");
        sleep(5);
      }
      else
unknown's avatar
unknown committed
490
      {
unknown's avatar
unknown committed
491 492 493
        my $retries= 10;                    # 10 seconds
        do
        {
unknown's avatar
unknown committed
494
          mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids));
unknown's avatar
unknown committed
495
          kill(9, @pids);
unknown's avatar
unknown committed
496
          mtr_report("Sleep 1 second waiting for processes to die");
unknown's avatar
unknown committed
497 498 499 500 501
          sleep(1)                      # Wait one second
        } while ( $retries-- and  kill(0, @pids) );

        if ( kill(0, @pids) )           # Check if some left
        {
502
          mtr_warning("can't kill process(es) " . join(" ", @pids));
unknown's avatar
unknown committed
503
        }
unknown's avatar
unknown committed
504
      }
unknown's avatar
unknown committed
505 506

      stop_reap_all();
unknown's avatar
unknown committed
507
    }
unknown's avatar
unknown committed
508
  }
unknown's avatar
unknown committed
509 510 511 512
  else
  {
    mtr_debug("Directory for PID files ($rundir) does not exist.");
  }
unknown's avatar
unknown committed
513

unknown's avatar
unknown committed
514
  # We may have failed everything, but we now check again if we have
unknown's avatar
unknown committed
515
  # the listen ports free to use, and if they are free, just go for it.
unknown's avatar
unknown committed
516

unknown's avatar
unknown committed
517 518 519
  mtr_debug("Checking known mysqld servers...");

  foreach my $srv ( @kill_pids )
unknown's avatar
unknown committed
520
  {
unknown's avatar
unknown committed
521
    if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) )
unknown's avatar
unknown committed
522
    {
unknown's avatar
unknown committed
523
      mtr_warning("can't kill old process holding port $srv->{'port'}");
unknown's avatar
unknown committed
524
    }
unknown's avatar
unknown committed
525 526
  }

unknown's avatar
unknown committed
527 528
  mtr_debug("mtr_kill_leftovers(): finished.");
}
unknown's avatar
unknown committed
529 530


unknown's avatar
unknown committed
531 532 533 534 535
# Check that all processes in list are killed
# The argument is a list of 'ports', 'pids', 'pidfiles' and 'socketfiles'
# for which shutdown has been started. Make sure they all get killed
# in one way or the other.
#
unknown's avatar
unknown committed
536
# FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and
unknown's avatar
unknown committed
537
# the pid in $srv->{'pidfile'} will not be the same PID. We need to try to kill
unknown's avatar
unknown committed
538
# both I think.
unknown's avatar
unknown committed
539

unknown's avatar
unknown committed
540
sub mtr_check_stop_servers ($) {
unknown's avatar
unknown committed
541
  my $spec=  shift;
unknown's avatar
unknown committed
542

unknown's avatar
unknown committed
543 544
  # Return if no processes are defined
  return if ! @$spec;
unknown's avatar
unknown committed
545

unknown's avatar
unknown committed
546 547 548
  #mtr_report("mtr_check_stop_servers");

  mtr_ping_with_timeout(\@$spec);
unknown's avatar
unknown committed
549 550 551

  # ----------------------------------------------------------------------
  # We loop with waitpid() nonblocking to see how many of the ones we
unknown's avatar
unknown committed
552 553 554
  # are to kill, actually got killed by mysqladmin or ndb_mgm
  #
  # Note that we don't rely on this, the mysqld server might have stopped
unknown's avatar
unknown committed
555 556
  # listening to the port, but still be alive. But it is a start.
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
557

unknown's avatar
unknown committed
558
  foreach my $srv ( @$spec )
unknown's avatar
unknown committed
559
  {
unknown's avatar
unknown committed
560 561
    my $ret_pid;
    if ( $srv->{'pid'} )
unknown's avatar
unknown committed
562
    {
unknown's avatar
unknown committed
563 564 565 566 567 568 569 570 571 572
      $ret_pid= waitpid($srv->{'pid'},&WNOHANG);
      if ($ret_pid == $srv->{'pid'})
      {
	mtr_verbose("Caught exit of process $ret_pid");
	$srv->{'pid'}= 0;
      }
      else
      {
	# mtr_warning("caught exit of unknown child $ret_pid");
      }
unknown's avatar
unknown committed
573 574 575
    }
  }

unknown's avatar
unknown committed
576
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
577 578 579 580
  # We know the process was started from this file, so there is a PID
  # saved, or else we have nothing to do.
  # Might be that is is recorded to be missing, but we failed to
  # take away the PID file earlier, then we do it now.
unknown's avatar
unknown committed
581
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
582

unknown's avatar
unknown committed
583
  my %mysqld_pids;
unknown's avatar
unknown committed
584 585

  foreach my $srv ( @$spec )
unknown's avatar
unknown committed
586
  {
unknown's avatar
unknown committed
587
    if ( $srv->{'pid'} )
unknown's avatar
unknown committed
588
    {
unknown's avatar
unknown committed
589
      $mysqld_pids{$srv->{'pid'}}= 1;
unknown's avatar
unknown committed
590
    }
unknown's avatar
unknown committed
591
    else
unknown's avatar
unknown committed
592
    {
unknown's avatar
unknown committed
593
      # Server is dead, we remove the pidfile if any
unknown's avatar
unknown committed
594 595 596 597 598
      # Race, could have been removed between I tested with -f
      # and the unlink() below, so I better check again with -f

      if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
           -f $srv->{'pidfile'} )
unknown's avatar
unknown committed
599
      {
unknown's avatar
unknown committed
600
        mtr_error("can't remove $srv->{'pidfile'}");
unknown's avatar
unknown committed
601 602 603 604
      }
    }
  }

unknown's avatar
unknown committed
605
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
606
  # If all the processes in list already have been killed,
unknown's avatar
unknown committed
607
  # then we don't have to do anything.
unknown's avatar
unknown committed
608
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
609

unknown's avatar
unknown committed
610
  if ( ! keys %mysqld_pids )
unknown's avatar
unknown committed
611
  {
unknown's avatar
unknown committed
612
    return;
unknown's avatar
unknown committed
613 614
  }

unknown's avatar
unknown committed
615
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
616 617 618 619
  # In mtr_mysqladmin_shutdown() we only waited for the mysqld servers
  # not to listen to the port. But we are not sure we got them all
  # killed. If we suspect it lives, try nice kill with SIG_TERM. Note
  # that for true Win32 processes, kill(0,$pid) will not return 1.
unknown's avatar
unknown committed
620
  # ----------------------------------------------------------------------
unknown's avatar
unknown committed
621

unknown's avatar
unknown committed
622 623
  start_reap_all();                     # Avoid zombies

unknown's avatar
unknown committed
624 625
  my @mysqld_pids= keys %mysqld_pids;
  mtr_kill_processes(\@mysqld_pids);
unknown's avatar
unknown committed
626

unknown's avatar
unknown committed
627 628
  stop_reap_all();                      # Get into control again

unknown's avatar
unknown committed
629 630 631 632 633
  # ----------------------------------------------------------------------
  # Now, we check if all we can find using kill(0,$pid) are dead,
  # and just assume the rest are. We cleanup socket and PID files.
  # ----------------------------------------------------------------------

unknown's avatar
unknown committed
634
  {
unknown's avatar
unknown committed
635 636
    my $errors= 0;
    foreach my $srv ( @$spec )
unknown's avatar
unknown committed
637
    {
unknown's avatar
unknown committed
638
      if ( $srv->{'pid'} )
unknown's avatar
unknown committed
639
      {
unknown's avatar
unknown committed
640 641 642 643 644 645 646 647
        if ( kill(0,$srv->{'pid'}) )
        {
          # FIXME In Cygwin there seem to be some fast reuse
          # of PIDs, so dying may not be the right thing to do.
          $errors++;
          mtr_warning("can't kill process $srv->{'pid'}");
        }
        else
unknown's avatar
unknown committed
648
        {
unknown's avatar
unknown committed
649 650 651 652 653 654 655 656 657 658
          # We managed to kill it at last
          # FIXME In Cygwin, we will get here even if the process lives.

          # Not needed as we know the process is dead, but to be safe
          # we unlink and check success in two steps. We first unlink
          # without checking the error code, and then check if the
          # file still exists.

          foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
          {
unknown's avatar
unknown committed
659
            # Know it is dead so should be no race, careful anyway
unknown's avatar
unknown committed
660
            if ( defined $file and -f $file and ! unlink($file) and -f $file )
unknown's avatar
unknown committed
661 662 663 664 665
            {
              $errors++;
              mtr_warning("couldn't delete $file");
            }
          }
unknown's avatar
unknown committed
666
	  $srv->{'pid'}= 0;
unknown's avatar
unknown committed
667 668 669
        }
      }
    }
unknown's avatar
unknown committed
670
    if ( $errors )
unknown's avatar
unknown committed
671
    {
unknown's avatar
unknown committed
672 673 674 675 676 677 678 679 680 681 682
      # There where errors killing processes
      # do one last attempt to ping the servers
      # and if they can't be pinged, assume they are dead
      if ( ! mtr_ping_with_timeout( \@$spec ) )
      {
	mtr_error("we could not kill or clean up all processes");
      }
      else
      {
	mtr_verbose("All ports were free, continuing");
      }
unknown's avatar
unknown committed
683 684 685
    }
  }

unknown's avatar
unknown committed
686 687
  # FIXME We just assume they are all dead, for Cygwin we are not
  # really sure
unknown's avatar
unknown committed
688

unknown's avatar
unknown committed
689 690
}

unknown's avatar
unknown committed
691 692 693
# Wait for all the process in the list to terminate
sub mtr_wait_blocking($) {
  my $admin_pids= shift;
unknown's avatar
unknown committed
694 695


unknown's avatar
unknown committed
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
  # Return if no processes defined
  return if ! %$admin_pids;

  mtr_verbose("mtr_wait_blocking");

  # Wait for all the started processes to exit
  # As mysqladmin is such a simple program, we trust it to terminate itself.
  # I.e. we wait blocking, and wait for them all before we go on.
  foreach my $pid (keys %{$admin_pids})
  {
    my $ret_pid= waitpid($pid,0);

  }
}

# Start "mysqladmin shutdown" for a specific mysqld
sub mtr_mysqladmin_start($$$) {
  my $srv= shift;
  my $command= shift;
unknown's avatar
unknown committed
715
  my $adm_shutdown_tmo= shift;
unknown's avatar
unknown committed
716

unknown's avatar
unknown committed
717 718
  my $args;
  mtr_init_args(\$args);
unknown's avatar
unknown committed
719

unknown's avatar
unknown committed
720 721 722 723 724 725 726 727 728
  mtr_add_arg($args, "--no-defaults");
  mtr_add_arg($args, "--user=%s", $::opt_user);
  mtr_add_arg($args, "--password=");
  mtr_add_arg($args, "--silent");
  if ( -e $srv->{'path_sock'} )
  {
    mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'});
  }
  if ( $srv->{'port'} )
unknown's avatar
unknown committed
729
  {
unknown's avatar
unknown committed
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
    mtr_add_arg($args, "--port=%s", $srv->{'port'});
  }
  if ( $srv->{'port'} and ! -e $srv->{'path_sock'} )
  {
    mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
  }
  mtr_add_arg($args, "--connect_timeout=5");

  # Shutdown time must be high as slave may be in reconnect
  mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
  mtr_add_arg($args, "$command");
  my $path_mysqladmin_log= "$::opt_vardir/log/mysqladmin.log";
  my $pid= mtr_spawn($::exe_mysqladmin, $args,
		     "", $path_mysqladmin_log, $path_mysqladmin_log, "",
		     { append_log_file => 1 });
  mtr_verbose("mtr_mysqladmin_start, pid: $pid");
  return $pid;

}

# Start "ndb_mgm shutdown" for a specific cluster, it will
# shutdown all data nodes and leave the ndb_mgmd running
sub mtr_ndbmgm_start($$) {
  my $cluster= shift;
  my $command= shift;

  my $args;

  mtr_init_args(\$args);

  mtr_add_arg($args, "--no-defaults");
  mtr_add_arg($args, "--core");
  mtr_add_arg($args, "--try-reconnect=1");
  mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'});
  mtr_add_arg($args, "-e");
  mtr_add_arg($args, "$command");

  my $pid= mtr_spawn($::exe_ndb_mgm, $args,
		     "", "/dev/null", "/dev/null", "",
		     {});
  mtr_verbose("mtr_ndbmgm_start, pid: $pid");
  return $pid;

}


# Ping all servers in list, exit when none of them answers
# or when timeout has passed
sub mtr_ping_with_timeout($) {
  my $spec= shift;
  my $timeout= 200;                     # 20 seconds max
  my $res= 1;                           # If we just fall through, we are done
                                        # in the sense that the servers don't
                                        # listen to their ports any longer

  mtr_debug("Waiting for mysqld servers to stop...");

 TIME:
  while ( $timeout-- )
  {
    foreach my $srv ( @$spec )
unknown's avatar
unknown committed
791
    {
unknown's avatar
unknown committed
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
      $res= 1;                          # We are optimistic
      if ( $srv->{'pid'} and defined $srv->{'port'} )
      {
	if ( mtr_ping_port($srv->{'port'}) )
	{
	  mtr_verbose("waiting for process $srv->{'pid'} to stop ".
		      "using port $srv->{'port'}");

	  # Millisceond sleep emulated with select
	  select(undef, undef, undef, (0.1));
	  $res= 0;
	  next TIME;
	}
	else
	{
	  # Process was not using port
	}
      }
unknown's avatar
unknown committed
810
    }
unknown's avatar
unknown committed
811
    last;                               # If we got here, we are done
unknown's avatar
unknown committed
812 813
  }

unknown's avatar
unknown committed
814 815 816 817 818
  if ($res)
  {
    mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down.");
  }
  else
unknown's avatar
unknown committed
819
  {
unknown's avatar
unknown committed
820 821
    mtr_report("mtr_ping_with_timeout(): At least one server is alive.");
  }
unknown's avatar
unknown committed
822

unknown's avatar
unknown committed
823 824 825 826 827 828 829 830 831 832 833 834
  return $res;
}


#
# Loop through our list of processes and look for and entry
# with the provided pid
# Set the pid of that process to 0 if found
#
sub mark_process_dead($)
{
  my $ret_pid= shift;
unknown's avatar
unknown committed
835

unknown's avatar
unknown committed
836 837 838
  foreach my $mysqld (@{$::master}, @{$::slave})
  {
    if ( $mysqld->{'pid'} eq $ret_pid )
unknown's avatar
unknown committed
839
    {
unknown's avatar
unknown committed
840 841 842
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
      $mysqld->{'pid'}= 0;
      return;
unknown's avatar
unknown committed
843
    }
unknown's avatar
unknown committed
844 845 846 847 848
  }

  foreach my $cluster (@{$::clusters})
  {
    if ( $cluster->{'pid'} eq $ret_pid )
unknown's avatar
unknown committed
849
    {
unknown's avatar
unknown committed
850 851 852
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
      $cluster->{'pid'}= 0;
      return;
unknown's avatar
unknown committed
853
    }
unknown's avatar
unknown committed
854 855

    foreach my $ndbd (@{$cluster->{'ndbds'}})
unknown's avatar
unknown committed
856
    {
unknown's avatar
unknown committed
857 858 859 860 861 862
      if ( $ndbd->{'pid'} eq $ret_pid )
      {
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
	$ndbd->{'pid'}= 0;
	return;
      }
unknown's avatar
unknown committed
863 864
    }
  }
unknown's avatar
unknown committed
865 866 867 868 869 870 871 872 873 874 875 876
  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");

}

#
# Loop through our list of processes and look for and entry
# with the provided pid, if found check for the file indicating
# expected crash and restart it.
#
sub check_expected_crash_and_restart($)
{
  my $ret_pid= shift;
unknown's avatar
unknown committed
877

unknown's avatar
unknown committed
878
  foreach my $mysqld (@{$::master}, @{$::slave})
unknown's avatar
unknown committed
879
  {
unknown's avatar
unknown committed
880
    if ( $mysqld->{'pid'} eq $ret_pid )
881
    {
unknown's avatar
unknown committed
882 883 884 885 886 887 888
      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
      $mysqld->{'pid'}= 0;

      # Check if crash expected and restart if it was
      my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
	"$mysqld->{'idx'}" . ".expect";
      if ( -f $expect_file )
889
      {
unknown's avatar
unknown committed
890 891 892 893
	mtr_verbose("Crash was expected, file $expect_file exists");
	mysqld_start($mysqld, $mysqld->{'start_opts'},
		     $mysqld->{'start_slave_master_info'});
	unlink($expect_file);
894
      }
unknown's avatar
unknown committed
895 896

      return;
897
    }
unknown's avatar
unknown committed
898
  }
unknown's avatar
unknown committed
899

unknown's avatar
unknown committed
900
  foreach my $cluster (@{$::clusters})
unknown's avatar
unknown committed
901
  {
unknown's avatar
unknown committed
902
    if ( $cluster->{'pid'} eq $ret_pid )
unknown's avatar
unknown committed
903
    {
unknown's avatar
unknown committed
904 905 906 907 908 909 910
      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
      $cluster->{'pid'}= 0;

      # Check if crash expected and restart if it was
      my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" .
	".expect";
      if ( -f $expect_file )
unknown's avatar
unknown committed
911
      {
unknown's avatar
unknown committed
912 913 914
	mtr_verbose("Crash was expected, file $expect_file exists");
	ndbmgmd_start($cluster);
	unlink($expect_file);
unknown's avatar
unknown committed
915
      }
unknown's avatar
unknown committed
916
      return;
unknown's avatar
unknown committed
917
    }
unknown's avatar
unknown committed
918

unknown's avatar
unknown committed
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
    foreach my $ndbd (@{$cluster->{'ndbds'}})
    {
      if ( $ndbd->{'pid'} eq $ret_pid )
      {
	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
	$ndbd->{'pid'}= 0;

	# Check if crash expected and restart if it was
	my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" .
	  "$ndbd->{'idx'}" . ".expect";
	if ( -f $expect_file )
	{
	  mtr_verbose("Crash was expected, file $expect_file exists");
	  ndbd_start($cluster, $ndbd->{'idx'},
		     $ndbd->{'start_extra_args'});
	  unlink($expect_file);
	}
	return;
      }
    }
  }
  mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid");
unknown's avatar
unknown committed
941

unknown's avatar
unknown committed
942 943 944 945 946 947 948 949 950 951 952 953
}

##############################################################################
#
#  The operating system will keep information about dead children, 
#  we read this information here, and if we have records the process
#  is alive, we mark it as dead.
#
##############################################################################

sub mtr_record_dead_children () {

unknown's avatar
unknown committed
954
  my $process_died= 0;
unknown's avatar
unknown committed
955 956
  my $ret_pid;

unknown's avatar
unknown committed
957 958 959
  # Wait without blockinng to see if any processes had died
  # -1 or 0 means there are no more procesess to wait for
  while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
unknown's avatar
unknown committed
960
  {
unknown's avatar
unknown committed
961 962 963
    mtr_warning("mtr_record_dead_children: $ret_pid");
    mark_process_dead($ret_pid);
    $process_died= 1;
unknown's avatar
unknown committed
964
  }
unknown's avatar
unknown committed
965
  return $process_died;
unknown's avatar
unknown committed
966 967
}

unknown's avatar
unknown committed
968
sub start_reap_all {
969 970 971 972 973 974 975 976
  # This causes terminating processes to not become zombies, avoiding
  # the need for (or possibility of) explicit waitpid().
  $SIG{CHLD}= 'IGNORE';

  # On some platforms (Linux, QNX, OSX, ...) there is potential race
  # here. If a process terminated before setting $SIG{CHLD} (but after
  # any attempt to waitpid() it), it will still be a zombie. So we
  # have to handle any such process here.
unknown's avatar
unknown committed
977 978 979 980 981 982
  my $pid;
  while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
  {
    mtr_warning("start_reap_all pid: $pid");
    mark_process_dead($pid);
  };
unknown's avatar
unknown committed
983 984 985 986 987
}

sub stop_reap_all {
  $SIG{CHLD}= 'DEFAULT';
}
unknown's avatar
unknown committed
988

unknown's avatar
unknown committed
989 990

sub mtr_ping_port ($) {
unknown's avatar
unknown committed
991 992
  my $port= shift;

unknown's avatar
unknown committed
993 994
  mtr_verbose("mtr_ping_port: $port");

unknown's avatar
unknown committed
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
  my $remote= "localhost";
  my $iaddr=  inet_aton($remote);
  if ( ! $iaddr )
  {
    mtr_error("can't find IP number for $remote");
  }
  my $paddr=  sockaddr_in($port, $iaddr);
  my $proto=  getprotobyname('tcp');
  if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
  {
    mtr_error("can't create socket: $!");
  }
unknown's avatar
unknown committed
1007 1008 1009

  mtr_debug("Pinging server (port: $port)...");

unknown's avatar
unknown committed
1010 1011 1012
  if ( connect(SOCK, $paddr) )
  {
    close(SOCK);                        # FIXME check error?
unknown's avatar
unknown committed
1013
    mtr_verbose("USED");
unknown's avatar
unknown committed
1014 1015 1016 1017
    return 1;
  }
  else
  {
unknown's avatar
unknown committed
1018
    mtr_verbose("FREE");
unknown's avatar
unknown committed
1019 1020 1021 1022
    return 0;
  }
}

unknown's avatar
unknown committed
1023 1024 1025 1026 1027 1028
##############################################################################
#
#  Wait for a file to be created
#
##############################################################################

unknown's avatar
unknown committed
1029
# FIXME check that the pidfile contains the expected pid!
unknown's avatar
unknown committed
1030

unknown's avatar
unknown committed
1031
sub sleep_until_file_created ($$$) {
unknown's avatar
unknown committed
1032 1033
  my $pidfile= shift;
  my $timeout= shift;
unknown's avatar
unknown committed
1034
  my $pid=     shift;
unknown's avatar
unknown committed
1035 1036
  my $sleeptime= 100; # Milliseconds
  my $loops= ($timeout * 1000) / $sleeptime;
unknown's avatar
unknown committed
1037

unknown's avatar
unknown committed
1038
  for ( my $loop= 1; $loop <= $loops; $loop++ )
unknown's avatar
unknown committed
1039 1040 1041
  {
    if ( -r $pidfile )
    {
unknown's avatar
unknown committed
1042
      return $pid;
unknown's avatar
unknown committed
1043 1044
    }

unknown's avatar
unknown committed
1045 1046
    # Check if it died after the fork() was successful
    if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
unknown's avatar
unknown committed
1047
    {
unknown's avatar
unknown committed
1048
      mtr_warning("Process $pid died");
unknown's avatar
unknown committed
1049 1050 1051
      return 0;
    }

unknown's avatar
unknown committed
1052
    mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile");
unknown's avatar
unknown committed
1053

unknown's avatar
unknown committed
1054 1055 1056
    # Print extra message every 60 seconds
    my $seconds= ($loop * $sleeptime) / 1000;
    if ( $seconds > 1 and int($seconds) % 60 == 0 )
unknown's avatar
unknown committed
1057
    {
unknown's avatar
unknown committed
1058 1059
      my $left= $timeout - $seconds;
      mtr_warning("Waited $seconds seconds for $pidfile to be created, " .
unknown's avatar
unknown committed
1060
                  "still waiting for $left seconds...");
unknown's avatar
unknown committed
1061 1062
    }

unknown's avatar
unknown committed
1063 1064
    # Millisceond sleep emulated with select
    select(undef, undef, undef, ($sleeptime/1000));
unknown's avatar
unknown committed
1065 1066
  }

unknown's avatar
unknown committed
1067
  return 0;
unknown's avatar
unknown committed
1068 1069 1070
}


1071 1072 1073
sub mtr_kill_processes ($) {
  my $pids = shift;

unknown's avatar
unknown committed
1074 1075 1076
  mtr_verbose("mtr_kill_processes " . join(" ", @$pids));

  foreach my $pid (@$pids)
1077
  {
unknown's avatar
unknown committed
1078
    foreach my $sig (15, 9)
1079
    {
unknown's avatar
unknown committed
1080
      last if mtr_im_kill_process([ $pid ], $sig, 10, 1);
1081 1082 1083 1084
    }
  }
}

unknown's avatar
unknown committed
1085

unknown's avatar
unknown committed
1086 1087 1088 1089 1090 1091
##############################################################################
#
#  When we exit, we kill off all children
#
##############################################################################

unknown's avatar
unknown committed
1092 1093 1094
# FIXME something is wrong, we sometimes terminate with "Hangup" written
# to tty, and no STDERR output telling us why.

unknown's avatar
unknown committed
1095
# FIXME for some reason, setting HUP to 'IGNORE' will cause exit() to
unknown's avatar
unknown committed
1096 1097
# write out "Hangup", and maybe loose some output. We insert a sleep...

unknown's avatar
unknown committed
1098 1099
sub mtr_exit ($) {
  my $code= shift;
1100
  mtr_timer_stop_all($::glob_timers);
unknown's avatar
unknown committed
1101
  local $SIG{HUP} = 'IGNORE';
unknown's avatar
unknown committed
1102 1103 1104 1105 1106 1107 1108 1109 1110
  # ToDo: Signalling -$$ will only work if we are the process group
  # leader (in fact on QNX it will signal our session group leader,
  # which might be Do-compile or Pushbuild, causing tests to be
  # aborted). So we only do it if we are the group leader. We might
  # set ourselves as the group leader at startup (with
  # POSIX::setpgrp(0,0)), but then care must be needed to always do
  # proper child process cleanup.
  kill('HUP', -$$) if !$::glob_win32_perl and $$ == getpgrp();

unknown's avatar
unknown committed
1111 1112 1113
  exit($code);
}

unknown's avatar
unknown committed
1114 1115
###########################################################################

unknown's avatar
unknown committed
1116
1;