Commit 1e832e36 authored by Geoff Simmons's avatar Geoff Simmons

The monitor starts the Varnish child process if it is stopped.

It should be configurable whether this happens -- an admin may want
to intentionally stop Varnish, and this feature would interfere with
that.
parent a00050a4
......@@ -54,6 +54,8 @@ type instanceMetrics struct {
pings prometheus.Counter
pingFails prometheus.Counter
panics prometheus.Counter
restarts prometheus.Counter
restartFails prometheus.Counter
childRunning prometheus.Counter
childNotRunning prometheus.Counter
vclDiscards prometheus.Counter
......@@ -163,6 +165,18 @@ var (
Name: "panics_total",
Help: "Total number of panics detected",
}),
restarts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "restarts_total",
Help: "Total number of child process restarts",
}),
restartFails: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "restart_fails_total",
Help: "Total number of child process restart failures",
}),
childRunning: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
......@@ -219,6 +233,8 @@ func initMetrics() {
prometheus.Register(metrics.pings)
prometheus.Register(metrics.pingFails)
prometheus.Register(metrics.panics)
prometheus.Register(metrics.restarts)
prometheus.Register(metrics.restartFails)
prometheus.Register(metrics.childRunning)
prometheus.Register(metrics.childNotRunning)
prometheus.Register(metrics.vclDiscards)
......
......@@ -41,6 +41,8 @@ const (
pingErr = "PingFailure"
statusErr = "StatusFailure"
statusNotRun = "StatusNotRunning"
restartErr = "RestartFailure"
restart = "Restart"
panicErr = "PanicFailure"
panic = "Panic"
vclListErr = "VCLListFailure"
......@@ -118,6 +120,19 @@ func (vc *Controller) checkInst() bool {
} else {
metrics.childNotRunning.Inc()
vc.warnEvt(statusNotRun, "Status: %s", state)
// XXX configure whether to auto-start
if state == admin.Stopped {
vc.log.Debug("Attempting restart")
if err := adm.Start(); err != nil {
vc.warnEvt(restartErr, "Cannot restart: %v",
err)
metrics.restartFails.Inc()
return false
} else {
vc.infoEvt(restart, "Child restarted")
metrics.restarts.Inc()
}
}
}
panic, err := adm.GetPanic()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment