Commit 1e832e36 authored by Geoff Simmons's avatar Geoff Simmons

The monitor starts the Varnish child process if it is stopped.

It should be configurable whether this happens -- an admin may want
to intentionally stop Varnish, and this feature would interfere with
that.
parent a00050a4
...@@ -54,6 +54,8 @@ type instanceMetrics struct { ...@@ -54,6 +54,8 @@ type instanceMetrics struct {
pings prometheus.Counter pings prometheus.Counter
pingFails prometheus.Counter pingFails prometheus.Counter
panics prometheus.Counter panics prometheus.Counter
restarts prometheus.Counter
restartFails prometheus.Counter
childRunning prometheus.Counter childRunning prometheus.Counter
childNotRunning prometheus.Counter childNotRunning prometheus.Counter
vclDiscards prometheus.Counter vclDiscards prometheus.Counter
...@@ -163,6 +165,18 @@ var ( ...@@ -163,6 +165,18 @@ var (
Name: "panics_total", Name: "panics_total",
Help: "Total number of panics detected", Help: "Total number of panics detected",
}), }),
restarts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "restarts_total",
Help: "Total number of child process restarts",
}),
restartFails: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "restart_fails_total",
Help: "Total number of child process restart failures",
}),
childRunning: prometheus.NewCounter(prometheus.CounterOpts{ childRunning: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Subsystem: subsystem, Subsystem: subsystem,
...@@ -219,6 +233,8 @@ func initMetrics() { ...@@ -219,6 +233,8 @@ func initMetrics() {
prometheus.Register(metrics.pings) prometheus.Register(metrics.pings)
prometheus.Register(metrics.pingFails) prometheus.Register(metrics.pingFails)
prometheus.Register(metrics.panics) prometheus.Register(metrics.panics)
prometheus.Register(metrics.restarts)
prometheus.Register(metrics.restartFails)
prometheus.Register(metrics.childRunning) prometheus.Register(metrics.childRunning)
prometheus.Register(metrics.childNotRunning) prometheus.Register(metrics.childNotRunning)
prometheus.Register(metrics.vclDiscards) prometheus.Register(metrics.vclDiscards)
......
...@@ -41,6 +41,8 @@ const ( ...@@ -41,6 +41,8 @@ const (
pingErr = "PingFailure" pingErr = "PingFailure"
statusErr = "StatusFailure" statusErr = "StatusFailure"
statusNotRun = "StatusNotRunning" statusNotRun = "StatusNotRunning"
restartErr = "RestartFailure"
restart = "Restart"
panicErr = "PanicFailure" panicErr = "PanicFailure"
panic = "Panic" panic = "Panic"
vclListErr = "VCLListFailure" vclListErr = "VCLListFailure"
...@@ -118,6 +120,19 @@ func (vc *Controller) checkInst() bool { ...@@ -118,6 +120,19 @@ func (vc *Controller) checkInst() bool {
} else { } else {
metrics.childNotRunning.Inc() metrics.childNotRunning.Inc()
vc.warnEvt(statusNotRun, "Status: %s", state) vc.warnEvt(statusNotRun, "Status: %s", state)
// XXX configure whether to auto-start
if state == admin.Stopped {
vc.log.Debug("Attempting restart")
if err := adm.Start(); err != nil {
vc.warnEvt(restartErr, "Cannot restart: %v",
err)
metrics.restartFails.Inc()
return false
} else {
vc.infoEvt(restart, "Child restarted")
metrics.restarts.Inc()
}
}
} }
panic, err := adm.GetPanic() panic, err := adm.GetPanic()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment