Commit 07d677aa authored by Geoff Simmons's avatar Geoff Simmons

Add set.hdr_filter().

This necessitates adding the matchonly() method to the vre2set
interface, which only checks for matches, and does not save the
indices of matching elements. It also requires a length parameter
(since headers in struct http are not null-terminated).

It also requires the return of the configure-time check for whether
the RE2::Set::Match() method can be called with a NULL vector for
the indices.
parent c7e8f60a
......@@ -67,6 +67,7 @@ SYNOPSIS
[, ENUM select])
BOOL <obj>.saved([ENUM {REGEX, STR, BE, INT} which] [, INT n]
[, ENUM select])
VOID <obj>.hdr_filter(HTTP [, BOOL])
# utility function
STRING re2.quotemeta(STRING)
......@@ -1769,6 +1770,50 @@ Example::
# and which=REGEX by default.
}
.. _vmod_re2.set.hdr_filter:
VOID xset.hdr_filter(HTTP, BOOL whitelist=1)
--------------------------------------------
Filters the headers in the HTTP object, which may be one of ``req``,
``resp``, ``bereq``, or ``beresp``. In other words, filter the headers
in the client or backend request or response.
If ``whitelist`` is ``true``, then headers that match one of the
patterns in the set are retained, and all other headers are removed.
Otherwise, headers that match a pattern in the set are removed, and
all others are retained. By default, ``whitelist`` is ``true``.
Example::
sub vcl_init {
# Header whitelist
new white = re2.set(anchor=start);
white.add("Foo:");
white.add("Bar:");
white.add("Baz: baz$");
white.compile();
# Header blacklist
new black = re2.set(anchor=start);
black.add("Chaotic:");
black.add("Evil:");
black.add("Wicked: wicked$");
black.compile();
}
sub vcl_recv {
# Filter the client request header with the whitelist.
# Headers that do not match any pattern in the set are removed.
white.hdr_filter(req);
}
sub vcl_deliver {
# Filter the client response header with the blacklist.
# Headers that match any pattern in the set are removed.
black.hdr_filter(resp, false);
}
.. _vmod_re2.quotemeta:
STRING quotemeta(STRING, STRING fallback)
......
......@@ -70,13 +70,16 @@ AC_FUNC_REALLOC
AC_FUNC_ERROR_AT_LINE
AC_CHECK_HEADER_STDBOOL
# Check RE2 capabilities
AC_LANG(C++)
SAVE_CXXFLAGS="$CXXFLAGS"
SAVE_LDFLAGS="$LDFLAGS"
CXXFLAGS+=" -std=c++11"
LDFLAGS+=" -lre2"
# Check if the Set::Match() method supports error reporting, to notify
# if a match failed due to the DFA hitting the max_mem
# limit. Available since RE2 commit ee52f03, or since version
# 2017-12-01.
AC_LANG(C++)
SAVE_CXXFLAGS="$CXXFLAGS"
CXXFLAGS+=" -std=c++11"
AC_MSG_CHECKING([for RE2::Set::Match() with ErrorInfo])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <re2/set.h>]], [[
re2::RE2::Set s(re2::RE2::DefaultOptions, re2::RE2::UNANCHORED);
......@@ -89,7 +92,33 @@ s.Match("", NULL, NULL)]])],
AC_DEFINE([HAVE_SET_MATCH_ERRORINFO], [0],
[Define to 1 if RE2::Set::Match() has the ErrorInfo parameter])
])
# RE2 versions up to 2016-03-01 require a pointer to vector<int> in
# Set::Match(), to identify the regex that was matched. Since commit
# df7a2dc in re2, the pointer may be NULL, if we just want to know
# whether there was a match. This check tests for that feature.
# Note: the test may cause a core dump if it fails.
AC_MSG_CHECKING([for RE2::Set::Match() with NULL index vector])
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include "re2/set.h"
main() {
re2::RE2::Set s(re2::RE2::DefaultOptions, re2::RE2::UNANCHORED);
s.Add("", NULL);
s.Compile();
s.Match("", NULL);
}
]])],
[AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_SET_MATCH_NULL_VECTOR], [1],
[Define to 1 if RE2 Set::Match() permits a NULL vector])
],
[AC_MSG_RESULT([no])
AC_DEFINE([HAVE_SET_MATCH_NULL_VECTOR], [0],
[Define to 1 if RE2 Set::Match() permits a NULL vector])
])
CXXFLAGS="$SAVE_CXXFLAGS"
LDFLAGS="$SAVE_LDFLAGS"
AC_LANG(C)
# --enable-stack-protector
......
......@@ -693,3 +693,79 @@ vmod_set_saved(VRT_CTX, struct vmod_re2_set *set, VCL_ENUM whichs, VCL_INT n,
WRONG("illegal which ENUM");
return 0;
}
/* copied from Varnish cache_http.c */
static void
http_VSLH_del(const struct http *hp, unsigned hdr)
{
int i;
if (hp->vsl != NULL) {
/* We don't support unsetting stuff in the first line */
assert (hdr >= HTTP_HDR_FIRST);
AN(hp->vsl->wid & (VSL_CLIENTMARKER|VSL_BACKENDMARKER));
i = (HTTP_HDR_UNSET - HTTP_HDR_METHOD);
i += hp->logtag;
VSLbt(hp->vsl, (enum VSL_tag_e)i, hp->hd[hdr]);
}
}
VCL_VOID
vmod_set_hdr_filter(VRT_CTX, struct VPFX(re2_set) *set, VCL_HTTP hp,
VCL_BOOL whitelist)
{
int match = 0;
const char *err;
errorkind_e errkind = NO_ERROR;
uint16_t u, v;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(set, VMOD_RE2_SET_MAGIC);
CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
for (v = u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
const char *hdr;
unsigned len;
Tcheck(hp->hd[u]);
hdr = hp->hd[u].b;
len = Tlen(hp->hd[u]);
if ((err = vre2set_matchonly(set->set, hdr, len, &match,
&errkind))
!= NULL) {
VERR(ctx, "%s.hdr_filter(%.*s): %s", set->vcl_name, len,
hdr, err);
goto loop;
}
switch(errkind) {
case NO_ERROR:
case NOT_IMPLEMENTED:
break;
case OUT_OF_MEMORY:
VERR(ctx, "%s.hdr_filter(%.*s): RE2 lib indicates "
"out-of-memory during match, consider increasing "
"max_mem", set->vcl_name, len, hdr);
goto loop;
case NOT_COMPILED:
case INCONSISTENT:
default:
WRONG("impossible or invalid error kind");
}
/* cf. Varnish http_Unset() */
/* !a != !b <=> a XOR b */
if (!whitelist != !match) {
http_VSLH_del(hp, u);
continue;
}
if (v != u) {
memcpy(&hp->hd[v], &hp->hd[u], sizeof *hp->hd);
memcpy(&hp->hdf[v], &hp->hdf[u], sizeof *hp->hdf);
}
loop:
v++;
}
hp->nhd = v;
}
# looks like -*- vcl -*-
varnishtest "set.hdr_filter()"
varnish v1 -vcl {
import ${vmod_re2};
backend be { .host = "${bad_ip}"; }
sub vcl_init {
new white = re2.set(anchor=start);
white.add("Foo:");
white.add("Bar:");
white.add("Baz: baz$");
white.compile();
new black = re2.set(anchor=start);
black.add("Chaotic:");
black.add("Evil:");
black.add("Wicked: wicked$");
black.compile();
}
sub vcl_recv {
set req.http.Foo = "foo";
set req.http.Bar = "bar";
set req.http.Baz = "baz";
set req.http.Quux = "quux";
white.hdr_filter(req);
return (synth(200));
}
sub vcl_synth {
set resp.http.Foo = req.http.Foo;
set resp.http.Bar = req.http.Bar;
set resp.http.Baz = req.http.Baz;
set resp.http.Quux = req.http.Quux;
set resp.http.Chaotic = "chaotic";
set resp.http.Evil = "evil";
set resp.http.Wicked = "wicked";
black.hdr_filter(resp, false);
return (deliver);
}
} -start
client c1 {
txreq
rxresp
expect resp.status == 200
expect resp.http.Foo == "foo"
expect resp.http.Bar == "bar"
expect resp.http.Baz == "baz"
expect resp.http.Quux == ""
expect resp.http.Chaotic == <undef>
expect resp.http.Evil == <undef>
expect resp.http.Wicked == <undef>
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "ReqUnset" {
expect 0 * Begin req
expect * = ReqUnset "^Quux: quux$"
expect * = RespUnset "^Chaotic: chaotic$"
expect * = RespUnset "^Evil: evil$"
expect * = RespUnset "^Wicked: wicked$"
expect * = End
} -run
......@@ -63,6 +63,7 @@ SYNOPSIS
[, ENUM select])
BOOL <obj>.saved([ENUM {REGEX, STR, BE, INT} which] [, INT n]
[, ENUM select])
VOID <obj>.hdr_filter(HTTP [, BOOL])
# utility function
STRING re2.quotemeta(STRING)
......@@ -1469,6 +1470,47 @@ Example::
# and which=REGEX by default.
}
$Method VOID .hdr_filter(HTTP, BOOL whitelist=1)
Filters the headers in the HTTP object, which may be one of ``req``,
``resp``, ``bereq``, or ``beresp``. In other words, filter the headers
in the client or backend request or response.
If ``whitelist`` is ``true``, then headers that match one of the
patterns in the set are retained, and all other headers are removed.
Otherwise, headers that match a pattern in the set are removed, and
all others are retained. By default, ``whitelist`` is ``true``.
Example::
sub vcl_init {
# Header whitelist
new white = re2.set(anchor=start);
white.add("Foo:");
white.add("Bar:");
white.add("Baz: baz$");
white.compile();
# Header blacklist
new black = re2.set(anchor=start);
black.add("Chaotic:");
black.add("Evil:");
black.add("Wicked: wicked$");
black.compile();
}
sub vcl_recv {
# Filter the client request header with the whitelist.
# Headers that do not match any pattern in the set are removed.
white.hdr_filter(req);
}
sub vcl_deliver {
# Filter the client response header with the blacklist.
# Headers that match any pattern in the set are removed.
black.hdr_filter(resp, false);
}
$Function STRING quotemeta(STRING,
STRING fallback="**QUOTEMETA FUNCTION FAILED**")
......
......@@ -87,6 +87,37 @@ vre2set::match(const char* subject, vector<int>* m, errorkind_e* err) const
#endif
}
inline bool
vre2set::matchonly(const char* subject, const unsigned len, errorkind_e* err)
const
{
StringPiece s(subject, static_cast<int>(len));
#if HAVE_SET_MATCH_ERRORINFO && HAVE_SET_MATCH_NULL_VECTOR
bool ret;
RE2::Set::ErrorInfo errinfo;
ret = set_->Match(s, NULL, &errinfo);
*err = (errorkind_e) errinfo.kind;
return ret;
#elif HAVE_SET_MATCH_ERRORINFO
bool ret;
RE2::Set::ErrorInfo errinfo;
vector<int> v;
ret = set_->Match(s, &v, &errinfo);
*err = (errorkind_e) errinfo.kind;
return ret;
#elif HAVE_SET_MATCH_NULL_VECTOR
(void)err;
return set_->Match(s, NULL);
#else
vector<int> v;
(void)err;
return set_->Match(s, &v);
#endif
}
const char *
vre2set_init(vre2set **setp, anchor_e anchor, unsigned utf8,
unsigned posix_syntax, unsigned longest_match, long max_mem,
......@@ -180,6 +211,17 @@ vre2set_match(vre2set *set, const char * const subject, int * const match,
CATCHALL
}
const char *
vre2set_matchonly(vre2set *set, const char * const subject, const unsigned len,
int * const match, errorkind_e * const err)
{
try {
*match = set->matchonly(subject, len, err);
return NULL;
}
CATCHALL
}
const char *
vre2set_fini(vre2set **set)
{
......
......@@ -57,6 +57,8 @@ public:
bool compile() const;
bool match(const char* subject, std::vector<int>* m,
errorkind_e* err) const;
bool matchonly(const char* subject, const unsigned len,
errorkind_e* err) const;
};
#else
typedef struct vre2set vre2set;
......@@ -86,6 +88,10 @@ extern "C" {
int * const match, void *buf,
const size_t buflen, size_t * const nmatches,
errorkind_e * const err);
const char *vre2set_matchonly(vre2set *set, const char *subject,
const unsigned len, int * const match,
errorkind_e * const err);
#ifdef __cplusplus
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment