Commit b7c7dc4e authored by Martin Pitt's avatar Martin Pitt

* t/030_errors.t: Add various test cases for stopping stopped clusters with

  stale or corrupt PID files, with and without --force. This reproduces
  #473879.
* pg_ctlcluster: On stop, clean up stale/corrupt PID files if the server is
  not running. Also fix a taint error and simplify the code a bit.
  (Closes: #473879)
parent ac8b91d4
postgresql-common (88) UNRELEASED; urgency=low
* t/030_errors.t: Add various test cases for stopping stopped clusters with
stale or corrupt PID files, with and without --force. This reproduces
#473879.
* pg_ctlcluster: On stop, clean up stale/corrupt PID files if the server is
not running. Also fix a taint error and simplify the code a bit.
(Closes: #473879)
-- Martin Pitt <mpitt@debian.org> Tue, 22 Apr 2008 23:13:51 +0200
postgresql-common (87) unstable; urgency=medium
* Urgency medium since #472930 is an important bug fix.
......
......@@ -35,20 +35,20 @@ sub get_running_pid {
if (open PIDFILE, $_[0]) {
my $pid = <PIDFILE>;
chomp $pid if defined $pid;
close PIDFILE;
chomp $pid;
($pid) = $pid =~ /^(\d+)\s*$/; # untaint
return $pid;
} else {
return undef;
}
}
# Check whether a pid file for the %info cluster is present and belongs to a
# running postmaster.
# Check whether a pid file is present and belongs to a running postmaster.
# Arguments: <pid file path>
sub check_running_postmaster {
my $pid = get_running_pid $_[0];
if (defined $pid and $pid =~ /^\d+$/) {
if (defined $pid) {
if (open PS, '-|', '/bin/ps', '-o', 'comm', 'h', 'p', $pid) {
my $process = <PS>;
chomp $process if defined $process;
......@@ -80,14 +80,20 @@ sub start_check_pid_file {
}
}
# Check if a pid file is not present or it is invalid. If so, abort.
# Check if a pid file is not present or it is invalid. If so, clean up/abort.
sub stop_check_pid_file {
my $pidfile = $info{'pgdata'}.'/postmaster.pid';
my $pid = get_running_pid $pidfile;
return if (defined $pid and $pid =~ /^\d+$/);
return if (defined $pid && check_running_postmaster $pidfile);
if ($info{'running'}) {
error 'pid file is invalid, please manually kill the stale server process.';
}
# Remove invalid or stale PID file
if (-e $pidfile) {
unlink $pidfile;
print "Removed stale pid file.\n";
}
print "Cluster is not running.\n";
exit 2;
}
......@@ -374,10 +380,7 @@ sub stop {
# if that still not helps, use the big hammer
if (-f $info{'pgdata'}.'/postmaster.pid') {
print "(does not shutdown, killing the process)";
if (open FPID, $info{'pgdata'}.'/postmaster.pid') {
$pid = <FPID>;
close FPID;
}
$pid = get_running_pid $info{'pgdata'}.'/postmaster.pid';
kill (9, $pid) if $pid;
unlink $info{'pgdata'}.'/postmaster.pid';
}
......
......@@ -6,7 +6,7 @@ require File::Temp;
use lib 't';
use TestLib;
use Test::More tests => 134;
use Test::More tests => 146;
use lib '/usr/share/postgresql-common';
use PgCommon;
......@@ -16,12 +16,15 @@ my $version = $MAJORS[-1];
my $socketdir = '/tmp/postgresql-testsuite/';
my ($pg_uid, $pg_gid) = (getpwnam 'postgres')[2,3];
sub create_foo_pid {
open F, ">/var/lib/postgresql/$version/main/postmaster.pid" or die "open: $!";
print F 'foo';
# create a pid file with content $1 and return its path
sub create_pidfile {
my $fname = "/var/lib/postgresql/$version/main/postmaster.pid";
open F, ">$fname" or die "open: $!";
print F $_[0];
close F;
chown $pg_uid, $pg_gid, "/var/lib/postgresql/$version/main/postmaster.pid" or die "chown: $!";
chmod 0700, "/var/lib/postgresql/$version/main/postmaster.pid" or die "chmod: $!";
chown $pg_uid, $pg_gid, $fname or die "chown: $!";
chmod 0700, $fname or die "chmod: $!";
return $fname;
}
sub check_nonexisting_cluster_error {
......@@ -93,7 +96,7 @@ ok_dir $socketdir, [], "No sockets in $socketdir";
# server should not stop with corrupt file
rename "/var/lib/postgresql/$version/main/postmaster.pid",
"/var/lib/postgresql/$version/main/postmaster.pid.orig" or die "rename: $!";
create_foo_pid;
create_pidfile 'foo';
is_program_out 'postgres', "pg_ctlcluster $version main stop", 1,
"Error: pid file is invalid, please manually kill the stale server process.\n",
'pg_ctlcluster fails with corrupted PID file';
......@@ -120,8 +123,33 @@ is ((exec_as 'postgres', "pg_ctlcluster $version main stop"), 0,
like_program_out 'postgres', 'pg_lsclusters -h', 0, qr/down/, 'cluster is down';
ok (! -e "/var/lib/postgresql/$version/main/postmaster.pid", 'no pid file left');
# trying to stop a stopped server cleans up corrupt and stale pid files
my $pf = create_pidfile 'foo';
is_program_out 'postgres', "pg_ctlcluster $version main stop", 2,
"Removed stale pid file.\nCluster is not running.\n",
'pg_ctlcluster stop succeeds with corrupted PID file';
ok (! -e $pf, 'pid file was cleaned up');
create_pidfile 'foo';
is_program_out 'postgres', "pg_ctlcluster --force $version main stop", 2,
"Removed stale pid file.\nCluster is not running.\n",
'pg_ctlcluster --force stop succeeds with corrupted PID file';
ok (! -e $pf, 'pid file was cleaned up');
create_pidfile '99998';
is_program_out 'postgres', "pg_ctlcluster $version main stop", 2,
"Removed stale pid file.\nCluster is not running.\n",
'pg_ctlcluster stop succeeds with stale PID file';
ok (! -e $pf, 'pid file was cleaned up');
create_pidfile '99998';
is_program_out 'postgres', "pg_ctlcluster --force $version main stop", 2,
"Removed stale pid file.\nCluster is not running.\n",
'pg_ctlcluster --force stop succeeds with stale PID file';
ok (! -e $pf, 'pid file was cleaned up');
# corrupt PID file while server is down
create_foo_pid;
create_pidfile 'foo';
is_program_out 'postgres', "pg_ctlcluster $version main start", 0,
"Removed stale pid file.\n", 'pg_ctlcluster succeeds with corrupted PID file';
like_program_out 'postgres', 'pg_lsclusters -h', 0, qr/online/, 'cluster is online';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment