Commit 6a50e7d2 authored by Geoff Simmons's avatar Geoff Simmons

Make removing a varnish instance more robust.

Non-timeout network errors are not fatal when an instance is deleted,
for the same reasons given for removing a haproxy instance in the
prior commit.

It is also not fatal if the admin Secret is missing when we remove
a varnish instance, or when setting its config to the NotReady VCL.
Similar to reasons given for haproxy in prior commits -- in such
cases we assume that Secret has been deleted in an undeployment
operation, so its not necessary to set a "not configured" state,
since the instance will be removed imminently.
parent 255909d5
/*
* Copyright (c) 2024 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
// Package net includes common network code used by the Varnish and
// haproxy controllers.
package net
import (
"net"
"github.com/sirupsen/logrus"
)
// IsNonTimeoutNetErr returns true for network errors that are not
// timeouts. On syncs for deletion, the Pod may be already gone.
func IsNonTimeoutNetErr(err error, log *logrus.Logger) bool {
log.Debugf("checking error type %T: %+v", err, err)
neterr, ok := err.(net.Error)
if !ok || !neterr.Timeout() {
return false
}
log.Warnf("Non-timeout network error: %+v", err)
return true
}
...@@ -36,7 +36,6 @@ package varnish ...@@ -36,7 +36,6 @@ package varnish
import ( import (
"fmt" "fmt"
"io" "io"
"net"
"reflect" "reflect"
"regexp" "regexp"
"strings" "strings"
...@@ -44,6 +43,7 @@ import ( ...@@ -44,6 +43,7 @@ import (
"time" "time"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/interfaces" "code.uplex.de/uplex-varnish/k8s-ingress/pkg/interfaces"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/net"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/update" "code.uplex.de/uplex-varnish/k8s-ingress/pkg/update"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/varnish/vcl" "code.uplex.de/uplex-varnish/k8s-ingress/pkg/varnish/vcl"
"code.uplex.de/uplex-varnish/varnishapi/pkg/admin" "code.uplex.de/uplex-varnish/varnishapi/pkg/admin"
...@@ -445,28 +445,20 @@ func (vc *Controller) setCfgLabel(inst *varnishInst, cfg, lbl string, ...@@ -445,28 +445,20 @@ func (vc *Controller) setCfgLabel(inst *varnishInst, cfg, lbl string,
return nil return nil
} }
// Ignore permanent network errors on syncs for deletion -- the
// Varnish instance may already be gone.
func (vc *Controller) ignorePermNetErr(err error) bool {
vc.log.Debugf("checking error type %T: %+v", err, err)
neterr, ok := err.(net.Error)
if !ok || !neterr.Temporary() {
return false
}
vc.log.Warnf("Ignoring permanent network error: %+v", err)
return true
}
// On Delete for a Varnish instance, we set it to the unready state. // On Delete for a Varnish instance, we set it to the unready state.
func (vc *Controller) removeVarnishInstances(insts []*varnishInst) error { func (vc *Controller) removeVarnishInstances(insts []*varnishInst) error {
var errs AdmErrors var errs AdmErrors
for _, inst := range insts { for _, inst := range insts {
// XXX health check for sharding config should fail if inst.admSecret == nil {
if err := vc.setCfgLabel(inst, notAvailCfg, readinessLabel, vc.log.Warnf("Removing Varnish instance %s: "+
true); err != nil { "admin secret is nil, cannot set the unready "+
"state, assuming the instance is deleted",
if vc.ignorePermNetErr(err) { inst.String())
} else if err := vc.setCfgLabel(inst, notAvailCfg,
readinessLabel, true); err != nil {
if net.IsNonTimeoutNetErr(err, vc.log) {
continue continue
} }
admErr := AdmError{inst: inst.String(), err: err} admErr := AdmError{inst: inst.String(), err: err}
...@@ -745,11 +737,17 @@ func (vc *Controller) SetNotReady(svcKey string) error { ...@@ -745,11 +737,17 @@ func (vc *Controller) SetNotReady(svcKey string) error {
var errs AdmErrors var errs AdmErrors
for _, inst := range svc.instances { for _, inst := range svc.instances {
if inst.admSecret == nil {
vc.log.Warnf("Varnish instance %s: admin secret is "+
"nil, cannot set the unready state, assuming "+
"the instance is deleted", inst.String())
continue
}
for _, label := range []string{readinessLabel, regularLabel} { for _, label := range []string{readinessLabel, regularLabel} {
if err := vc.setCfgLabel(inst, notAvailCfg, label, if err := vc.setCfgLabel(inst, notAvailCfg, label,
false); err != nil { false); err != nil {
if vc.ignorePermNetErr(err) { if net.IsNonTimeoutNetErr(err, vc.log) {
continue continue
} }
admErr := AdmError{ admErr := AdmError{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment