Commit 6a50e7d2 authored by Geoff Simmons's avatar Geoff Simmons

Make removing a varnish instance more robust.

Non-timeout network errors are not fatal when an instance is deleted,
for the same reasons given for removing a haproxy instance in the
prior commit.

It is also not fatal if the admin Secret is missing when we remove
a varnish instance, or when setting its config to the NotReady VCL.
Similar to reasons given for haproxy in prior commits -- in such
cases we assume that Secret has been deleted in an undeployment
operation, so its not necessary to set a "not configured" state,
since the instance will be removed imminently.
parent 255909d5
/*
* Copyright (c) 2024 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
// Package net includes common network code used by the Varnish and
// haproxy controllers.
package net
import (
"net"
"github.com/sirupsen/logrus"
)
// IsNonTimeoutNetErr returns true for network errors that are not
// timeouts. On syncs for deletion, the Pod may be already gone.
func IsNonTimeoutNetErr(err error, log *logrus.Logger) bool {
log.Debugf("checking error type %T: %+v", err, err)
neterr, ok := err.(net.Error)
if !ok || !neterr.Timeout() {
return false
}
log.Warnf("Non-timeout network error: %+v", err)
return true
}
......@@ -36,7 +36,6 @@ package varnish
import (
"fmt"
"io"
"net"
"reflect"
"regexp"
"strings"
......@@ -44,6 +43,7 @@ import (
"time"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/interfaces"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/net"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/update"
"code.uplex.de/uplex-varnish/k8s-ingress/pkg/varnish/vcl"
"code.uplex.de/uplex-varnish/varnishapi/pkg/admin"
......@@ -445,28 +445,20 @@ func (vc *Controller) setCfgLabel(inst *varnishInst, cfg, lbl string,
return nil
}
// Ignore permanent network errors on syncs for deletion -- the
// Varnish instance may already be gone.
func (vc *Controller) ignorePermNetErr(err error) bool {
vc.log.Debugf("checking error type %T: %+v", err, err)
neterr, ok := err.(net.Error)
if !ok || !neterr.Temporary() {
return false
}
vc.log.Warnf("Ignoring permanent network error: %+v", err)
return true
}
// On Delete for a Varnish instance, we set it to the unready state.
func (vc *Controller) removeVarnishInstances(insts []*varnishInst) error {
var errs AdmErrors
for _, inst := range insts {
// XXX health check for sharding config should fail
if err := vc.setCfgLabel(inst, notAvailCfg, readinessLabel,
true); err != nil {
if vc.ignorePermNetErr(err) {
if inst.admSecret == nil {
vc.log.Warnf("Removing Varnish instance %s: "+
"admin secret is nil, cannot set the unready "+
"state, assuming the instance is deleted",
inst.String())
} else if err := vc.setCfgLabel(inst, notAvailCfg,
readinessLabel, true); err != nil {
if net.IsNonTimeoutNetErr(err, vc.log) {
continue
}
admErr := AdmError{inst: inst.String(), err: err}
......@@ -745,11 +737,17 @@ func (vc *Controller) SetNotReady(svcKey string) error {
var errs AdmErrors
for _, inst := range svc.instances {
if inst.admSecret == nil {
vc.log.Warnf("Varnish instance %s: admin secret is "+
"nil, cannot set the unready state, assuming "+
"the instance is deleted", inst.String())
continue
}
for _, label := range []string{readinessLabel, regularLabel} {
if err := vc.setCfgLabel(inst, notAvailCfg, label,
false); err != nil {
if vc.ignorePermNetErr(err) {
if net.IsNonTimeoutNetErr(err, vc.log) {
continue
}
admErr := AdmError{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment