Commit 3a7824d1 authored by Tollef Fog Heen's avatar Tollef Fog Heen

Merge r5116: Kill child process if CLI gets out of sync

Whenever the CLI comms with the child process runs of the rails, we
have to kill the child:  We cannot get the CLI pipes back in sync,
even if the child process was just preoccupied with something for 
much longer than cli_timeout.

Fixes: #744



git-svn-id: http://www.varnish-cache.org/svn/branches/2.1@5327 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent 0c0ced95
...@@ -47,6 +47,7 @@ extern pid_t child_pid; ...@@ -47,6 +47,7 @@ extern pid_t child_pid;
void MGT_Run(void); void MGT_Run(void);
void mgt_stop_child(void); void mgt_stop_child(void);
void mgt_got_fd(int fd); void mgt_got_fd(int fd);
void MGT_Child_Cli_Fail(void);
/* mgt_cli.c */ /* mgt_cli.c */
......
...@@ -200,14 +200,33 @@ child_poker(const struct vev *e, int what) ...@@ -200,14 +200,33 @@ child_poker(const struct vev *e, int what)
return (0); return (0);
if (!mgt_cli_askchild(NULL, NULL, "ping\n")) if (!mgt_cli_askchild(NULL, NULL, "ping\n"))
return (0); return (0);
REPORT(LOG_ERR, return (0);
"Child (%jd) not responding to ping, killing it.", }
/*--------------------------------------------------------------------
* If CLI communications with the child process fails, there is nothing
* for us to do but to drag it behind the barn and get it over with.
*
* The typical case is where the child process fails to return a reply
* before the cli_timeout expires. This invalidates the CLI pipes for
* all future use, as we don't know if the child was just slow and the
* result gets piped later on, or if the child is catatonic.
*/
void
MGT_Child_Cli_Fail(void)
{
if (child_state != CH_RUNNING)
return;
if (child_pid < 0)
return;
REPORT(LOG_ERR, "Child (%jd) not responding to CLI, killing it.",
(intmax_t)child_pid); (intmax_t)child_pid);
if (params->diag_bitmap & 0x1000) if (params->diag_bitmap & 0x1000)
(void)kill(child_pid, SIGKILL); (void)kill(child_pid, SIGKILL);
else else
(void)kill(child_pid, SIGQUIT); (void)kill(child_pid, SIGQUIT);
return (0);
} }
/*--------------------------------------------------------------------*/ /*--------------------------------------------------------------------*/
......
...@@ -178,16 +178,16 @@ mcf_askchild(struct cli *cli, const char * const *av, void *priv) ...@@ -178,16 +178,16 @@ mcf_askchild(struct cli *cli, const char * const *av, void *priv)
if (i != strlen(cli->cmd)) { if (i != strlen(cli->cmd)) {
cli_result(cli, CLIS_COMMS); cli_result(cli, CLIS_COMMS);
cli_out(cli, "CLI communication error"); cli_out(cli, "CLI communication error");
MGT_Child_Cli_Fail();
return; return;
} }
i = write(cli_o, "\n", 1); i = write(cli_o, "\n", 1);
if (i != 1) { if (i != 1) {
cli_result(cli, CLIS_COMMS); cli_result(cli, CLIS_COMMS);
cli_out(cli, "CLI communication error"); cli_out(cli, "CLI communication error");
MGT_Child_Cli_Fail();
return; return;
} }
assert(i == 1 || errno == EPIPE);
(void)cli_readres(cli_i, &u, &q, params->cli_timeout); (void)cli_readres(cli_i, &u, &q, params->cli_timeout);
cli_result(cli, u); cli_result(cli, u);
cli_out(cli, "%s", q); cli_out(cli, "%s", q);
...@@ -233,12 +233,15 @@ mgt_cli_askchild(unsigned *status, char **resp, const char *fmt, ...) { ...@@ -233,12 +233,15 @@ mgt_cli_askchild(unsigned *status, char **resp, const char *fmt, ...) {
*status = CLIS_COMMS; *status = CLIS_COMMS;
if (resp != NULL) if (resp != NULL)
*resp = strdup("CLI communication error"); *resp = strdup("CLI communication error");
MGT_Child_Cli_Fail();
return (CLIS_COMMS); return (CLIS_COMMS);
} }
(void)cli_readres(cli_i, &u, resp, params->cli_timeout); (void)cli_readres(cli_i, &u, resp, params->cli_timeout);
if (status != NULL) if (status != NULL)
*status = u; *status = u;
if (u == CLIS_COMMS)
MGT_Child_Cli_Fail();
return (u == CLIS_OK ? 0 : u); return (u == CLIS_OK ? 0 : u);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment