varnishstatdiff: New utility to compare metrics

As I was comparing the output of two varnishstat executions that were captured after a fresh start of varnishd followed by the workloads to compare, I realized diff(1) was giving me a hard time, and git-diff(1) barely improved the situation. Looking for generic command line utilities to compare metrics I wasn't able to find anything. So instead I came up with the output format I thought would help me spot interesting differences and came up with a format inspired by the unified diff, with a twist. I wanted metrics to be vertically aligned to easily see differences in orders of magnitude and reduce the noise to a minimum. The result is that taking this detour to script varnishstatdiff sped my research up ultimately. This should hopefully be portable to POSIX systems.

varnishstatdiff: New utility to compare metrics
As I was comparing the output of two varnishstat executions that were captured after a fresh start of varnishd followed by the workloads to compare, I realized diff(1) was giving me a hard time, and git-diff(1) barely improved the situation. Looking for generic command line utilities to compare metrics I wasn't able to find anything. So instead I came up with the output format I thought would help me spot interesting differences and came up with a format inspired by the unified diff, with a twist. I wanted metrics to be vertically aligned to easily see differences in orders of magnitude and reduce the noise to a minimum. The result is that taking this detour to script varnishstatdiff sped my research up ultimately. This should hopefully be portable to POSIX systems.
3d0a72c8 · Dridi Boukelmoune · 5b06c845 · 3d0a72c8 · 3d0a72c8 · 3d0a72c8
Commit 3d0a72c8 authored Jul 11, 2022 by Dridi Boukelmoune
Showing with 240 additions and 0 deletions

configure.ac configure.ac +4 -0

Makefile.am contrib/Makefile.am +7 -0

statdiff_b00000.vtc contrib/tests/statdiff_b00000.vtc +54 -0

varnishstatdiff contrib/varnishstatdiff +175 -0

No files found.
--- a/configure.ac
+++ b/configure.ac
@@ -932,6 +932,10 @@ AC_ARG_WITH([contrib],
 AM_CONDITIONAL([WITH_CONTRIB], [test "$with_contrib" = yes])

 AM_COND_IF([WITH_CONTRIB], [
+	CONTRIB_TESTS="$(cd $srcdir/contrib && echo tests/*.vtc)"
+	AC_SUBST(CONTRIB_TESTS)
+	AM_SUBST_NOTMAKE(CONTRIB_TESTS)
+
 	AC_DEFINE([WITH_CONTRIB], [1],
 		[Define to 1 when Varnish is built with contributions.])


--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
 #

+dist_bin_SCRIPTS = \
+	varnishstatdiff

+TESTS = @CONTRIB_TESTS@
+
+include $(top_srcdir)/vtc.am
+
+EXTRA_DIST = $(TESTS)
--- a/contrib/tests/statdiff_b00000.vtc
+++ b/contrib/tests/statdiff_b00000.vtc
+varnishtest "varnishstatdiff coverage"
+
+feature cmd "command -v column"
+feature cmd "command -v diff"
+
+server s1 {
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend "" -start
+
+shell {
+	varnishstat -n ${v1_name} -1 \
+		-I MAIN.n_object -I MAIN.cache_* -I MAIN.client_req |
+	tee stat1.txt
+}
+
+client c1 {
+	txreq
+	rxresp
+} -start
+
+varnish v1 -vsl_catchup
+
+shell {
+	varnishstat -n ${v1_name} -1 \
+		-I MAIN.n_object -I MAIN.cache_* -I MAIN.esi_req |
+	tee stat2.txt
+}
+
+shell -expect Usage: {varnishstatdiff -h}
+shell -expect "Error: not enough arguments" -err {varnishstatdiff}
+shell -expect "Error: not enough arguments" -err {varnishstatdiff a}
+shell -expect "Error: too many arguments" -err {varnishstatdiff a b c}
+
+shell {
+	varnishstatdiff stat1.txt stat2.txt | tee diff.txt
+}
+
+shell {
+	sed 's/@/ /' >expected.txt <<-EOF
+	--- stat1.txt
+	+++ stat2.txt
+	@MAIN.cache_miss  -0  -0.00  Cache misses
+	@                 +1  +0.00
+	-MAIN.client_req   0   0.00  Good client requests received
+	+MAIN.esi_req      0   0.00  ESI subrequests
+	@MAIN.n_object    -0   .     object structs made
+	@                 +1   .
+	EOF
+
+	diff -u expected.txt diff.txt
+}
--- a/contrib/varnishstatdiff
+++ b/contrib/varnishstatdiff
+#!/bin/sh
+#
+# Copyright (c) 2022 Varnish Software AS
+# All rights reserved.
+#
+# Author: Dridi Boukelmoune <dridi.boukelmoune@gmail.com>
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+set -e
+set -u
+
+readonly SCRIPT=$0
+readonly TMP=$(mktemp -d)
+trap 'rm -rf $TMP' EXIT
+
+usage() {
+	test $# -eq 1 &&
+	printf 'Error: %s.\n\n' "$1"
+
+	sed 's:@: :' <<-EOF
+	Usage: $SCRIPT <file1> <file2>
+	       $SCRIPT -h
+
+	Show the differences between two sets of varnish metrics extracted
+	with 'varnishstat -1'.
+
+	Available options:
+	-h : show this help and exit
+
+	Considering the following metrics in <file1>:
+
+	FOO.counter    123     12    Only in file 1
+	BAR.counter    456     45    Counter present in both files
+	BAR.gauge      999     .     Gauge present in both files
+
+	And the following metrics in <file2>:
+
+	BAR.counter    789     79    Counter present in both files
+	BAR.gauge      555     .     Gauge present in both files
+	BAZ.gauge        0     .     Only in file 2
+
+	The output is sorted by metric name and looks like this:
+
+	--- <file1>
+	+++ <file2>
+	@BAR.counter  -456  -45  Counter present in both files
+	@             +789  +79
+	@BAR.gauge    -999   .   Gauge present in both files
+	@             +555   .
+	+BAZ.gauge     0     .   Only in file 2
+	-FOO.counter   123   12  Only in file 1
+
+	The output looks like a unified diff except that when metrics are
+	present in both files, the diff is rendered as such only in the
+	metrics columns.
+	EOF
+	exit $#
+}
+
+join_prepare() {
+	# NB: the metrics need to be sorted to later be joined, and since
+	# the metrics descriptions contain spaces, using a delimiter other
+	# than space solves the problem. Hopefully @ never shows up in the
+	# varnishstat -1 output.
+	sort -k 1b,1 "$1" |
+	sed 's:  *:@: ; s::@: ; s::@:'
+}
+
+join_render() {
+	# The resulting columns are:
+	# 1: metric name
+	# 2: value in file 1
+	# 3: rate in file 1
+	# 4: value in file 2
+	# 5: rate in file 2
+	# 6: description in file 1
+	# 7: description in file 2
+	join -a1 -a2 -t@ -o '0 1.2 1.3 2.2 2.3 1.4 2.4' -- "$1" "$2"
+}
+
+diff_preamble() {
+	printf "%s %s\n+++ %s\n" --- "$1" "$2"
+}
+
+diff_measure() {
+	awk -F@ '
+	BEGIN {
+		max[1] = 0
+		max[2] = 0
+		max[3] = 0
+		max[4] = 0
+		max[5] = 0
+	}
+	$2 != $4 || $3 != $5 {
+		for (i in max) {
+			if (max[i] < length($i))
+				max[i] = length($i)
+		}
+	}
+	END {
+		if (max[2] < max[4])
+			max[2] = max[4]
+		if (max[3] < max[5])
+			max[3] = max[5]
+		printf "%d %d %d\n", max[1] + 2, max[2] + 2, max[3] + 2
+	}
+	'
+}
+
+diff_render() {
+	read l1 l2 l3
+	awk -F@ -v l1="$l1" -v l2="$l2" -v l3="$l3" '
+	$2 != "" && $4 != "" && ($2 != $4 || $3 != $5) { # present in both
+		sgn = "-"
+		if ($3 == ".")
+			sgn = " "
+		printf " %-*s-%-*s%s%-*s%s\n", l1, $1, l2, $2, sgn, l3, $3, $6
+
+		sgn = "+"
+		if ($5 == ".")
+			sgn = " "
+		printf " %-*s+%-*s%s%s\n", l1, "", l2, $4, sgn, $5
+	}
+	$2 != "" && $4 == "" { # only in file 1
+		printf "-%-*s %-*s %-*s%s\n", l1, $1, l2, $2, l3, $3, $6
+	}
+	$2 == "" && $4 != "" { # only in file 2
+		printf "+%-*s %-*s %-*s%s\n", l1, $1, l2, $4, l3, $5, $7
+	}
+	' <"$1"
+}
+
+while getopts h OPT
+do
+	case $OPT in
+	h) usage ;;
+	*) usage "wrong usage" >&2 ;;
+	esac
+done
+
+shift $((OPTIND - 1))
+
+test $# -lt 2 && usage "not enough arguments" >&2
+test $# -gt 2 && usage "too many arguments" >&2
+
+export LC_ALL=C.utf-8
+
+join_prepare "$1" >"$TMP"/1
+join_prepare "$2" >"$TMP"/2
+join_render "$TMP"/1 "$TMP"/2 >"$TMP"/join
+diff_preamble "$1" "$2"
+diff_measure <"$TMP"/join |
+diff_render "$TMP"/join