Unverified Commit ed6a1138 authored by Justin M. Keyes's avatar Justin M. Keyes Committed by GitHub

Merge #8273 'job-control: avoid kill-timer race'

parents 7598e6cf b2c06640
......@@ -5033,10 +5033,9 @@ jobstart({cmd}[, {opts}]) *jobstart()*
width : (pty only) Width of the terminal screen
height : (pty only) Height of the terminal screen
TERM : (pty only) $TERM environment variable
detach : (non-pty only) Detach the job process from the
nvim process. The process will not get killed
when nvim exits. If the process dies before
nvim exits, "on_exit" will still be invoked.
detach : (non-pty only) Detach the job process: it will
not be killed when Nvim exits. If the process
exits before Nvim, "on_exit" will be invoked.
{opts} is passed as |self| dictionary to the callback; the
caller may set other keys to pass application-specific data.
......
......@@ -658,9 +658,9 @@ static void channel_process_exit_cb(Process *proc, int status, void *data)
terminal_close(chan->term, msg);
}
// if status is -1 the process did not really exit,
// we just closed the handle onto a detached process
if (status >= 0) {
// If process did not exit, we only closed the handle of a detached process.
bool exited = (status >= 0);
if (exited) {
process_channel_event(chan, &chan->on_exit, "exit", NULL, 0, status);
}
......
......@@ -21,7 +21,6 @@ void loop_init(Loop *loop, void *data)
loop->recursive = 0;
loop->uv.data = loop;
loop->children = kl_init(WatcherPtr);
loop->children_stop_requests = 0;
loop->events = multiqueue_new_parent(loop_on_put, loop);
loop->fast_events = multiqueue_new_child(loop->events);
loop->thread_events = multiqueue_new_parent(NULL, NULL);
......
......@@ -37,7 +37,6 @@ typedef struct loop {
// generic timer, used by loop_poll_events()
uv_timer_t poll_timer;
size_t children_stop_requests;
uv_async_t async;
uv_mutex_t mutex;
int recursive;
......
......@@ -23,7 +23,7 @@
#endif
// Time for a process to exit cleanly before we send KILL.
// For pty processes SIGTERM is sent first (in case SIGHUP was not enough).
// For PTY processes SIGTERM is sent first (in case SIGHUP was not enough).
#define KILL_TIMEOUT_MS 2000
static bool process_is_tearing_down = false;
......@@ -111,6 +111,7 @@ int process_spawn(Process *proc, bool in, bool out, bool err)
proc->internal_close_cb = decref;
proc->refcount++;
kl_push(WatcherPtr, proc->loop->children, proc);
DLOG("new: pid=%d argv=[%s]", proc->pid, *proc->argv);
return 0;
}
......@@ -188,8 +189,7 @@ int process_wait(Process *proc, int ms, MultiQueue *events)
}
if (proc->refcount == 1) {
// Job exited, collect status and manually invoke close_cb to free the job
// resources
// Job exited, free its resources.
decref(proc);
if (events) {
// the decref call created an exit event, process it now
......@@ -205,11 +205,12 @@ int process_wait(Process *proc, int ms, MultiQueue *events)
/// Ask a process to terminate and eventually kill if it doesn't respond
void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL
{
if (proc->stopped_time) {
bool exited = (proc->status >= 0);
if (exited || proc->stopped_time) {
return;
}
proc->stopped_time = os_hrtime();
switch (proc->type) {
case kProcessTypeUv:
// Close the process's stdin. If the process doesn't close its own
......@@ -227,35 +228,32 @@ void process_stop(Process *proc) FUNC_ATTR_NONNULL_ALL
abort();
}
Loop *loop = proc->loop;
if (!loop->children_stop_requests++) {
// When there's at least one stop request pending, start a timer that
// will periodically check if a signal should be send to the job.
ILOG("starting job kill timer");
uv_timer_start(&loop->children_kill_timer, children_kill_cb,
KILL_TIMEOUT_MS, KILL_TIMEOUT_MS);
}
// (Re)start timer to verify that stopped process(es) died.
uv_timer_start(&proc->loop->children_kill_timer, children_kill_cb,
KILL_TIMEOUT_MS, 0);
}
/// Iterates the process list sending SIGTERM to stopped processes and SIGKILL
/// to those that didn't die from SIGTERM after a while(exit_timeout is 0).
/// Sends SIGKILL (or SIGTERM..SIGKILL for PTY jobs) to processes that did
/// not terminate after process_stop().
static void children_kill_cb(uv_timer_t *handle)
{
Loop *loop = handle->loop->data;
uint64_t now = os_hrtime();
kl_iter(WatcherPtr, loop->children, current) {
Process *proc = (*current)->data;
if (!proc->stopped_time) {
bool exited = (proc->status >= 0);
if (exited || !proc->stopped_time) {
continue;
}
uint64_t elapsed = (now - proc->stopped_time) / 1000000 + 1;
if (elapsed >= KILL_TIMEOUT_MS) {
int sig = proc->type == kProcessTypePty && elapsed < KILL_TIMEOUT_MS * 2
? SIGTERM
: SIGKILL;
os_proc_tree_kill(proc->pid, sig);
uint64_t term_sent = UINT64_MAX == proc->stopped_time;
if (kProcessTypePty != proc->type || term_sent) {
os_proc_tree_kill(proc->pid, SIGKILL);
} else {
os_proc_tree_kill(proc->pid, SIGTERM);
proc->stopped_time = UINT64_MAX; // Flag: SIGTERM was sent.
// Restart timer.
uv_timer_start(&proc->loop->children_kill_timer, children_kill_cb,
KILL_TIMEOUT_MS, 0);
}
}
}
......@@ -383,12 +381,8 @@ static void process_close_handles(void **argv)
static void on_process_exit(Process *proc)
{
Loop *loop = proc->loop;
if (proc->stopped_time && loop->children_stop_requests
&& !--loop->children_stop_requests) {
// Stop the timer if no more stop requests are pending
DLOG("Stopping process kill timer");
uv_timer_stop(&loop->children_kill_timer);
}
ILOG("exited: pid=%d status=%d stoptime=%" PRIu64, proc->pid, proc->status,
proc->stopped_time);
// Process has terminated, but there could still be data to be read from the
// OS. We are still in the libuv loop, so we cannot call code that polls for
......
......@@ -19,8 +19,7 @@ struct process {
Loop *loop;
void *data;
int pid, status, refcount;
// set to the hrtime of when process_stop was called for the process.
uint64_t stopped_time;
uint64_t stopped_time; // process_stop() timestamp
const char *cwd;
char **argv;
Stream in, out, err;
......
......@@ -32,7 +32,7 @@ describe('TermClose event', function()
end)
it('kills job trapping SIGTERM', function()
if helpers.pending_win32(pending) then return end
if iswin() then return end
nvim('set_option', 'shell', 'sh')
nvim('set_option', 'shellcmdflag', '-c')
command([[ let g:test_job = jobstart('trap "" TERM && echo 1 && sleep 60', { ]]
......@@ -51,8 +51,8 @@ describe('TermClose event', function()
ok(duration <= 4000) -- Epsilon for slow CI
end)
it('kills pty job trapping SIGHUP and SIGTERM', function()
if helpers.pending_win32(pending) then return end
it('kills PTY job trapping SIGHUP and SIGTERM', function()
if iswin() then return end
nvim('set_option', 'shell', 'sh')
nvim('set_option', 'shellcmdflag', '-c')
command([[ let g:test_job = jobstart('trap "" HUP TERM && echo 1 && sleep 60', { ]]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment