Commit af817567 authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

Add "regsub" support for string manipulation.

Notice this facility is subject to change!

"regsub" is short for regular expression substitution and it is probably
easiest to explain with some examples:

	sub vcl_recv {
		set req.url = regsub(req.url, "#.*", "");
	}

This will replace the requests URL with the output of the regsub() function

regsub() takes three arguments: the string to be examined, a regular
expression and a replacement string.

In this case, everything after the first '#' is removed (replaced
with nothing).

The replacement string recognizes the following magic sequences:
	&	- insert everything matched by the regexp
	$0	- ditto.
	$1	- replace with the first submatch of the regexp
	$2	- replace with the second submatch of the regexp
	...
	$9	- replace with the ninth submatch of the regexp

(The $0..$9 syntax was chosen over the \0...\9 syntax in order to avoid
a nightmare of escape characters in the VCL source code.  Arguments and
suggestions are welcome).

A more advanced example:

	set bereq.http.ClientIP = regsub(client.ip, "(.*):(.*)", "$2 $1");

The client.ip variable expands to IP:port number, for instance
	127.0.0.1:54662

The regular expression "(.*):(.*)" results in the the following matches:
	& + $0		"127.0.0.1:54662"
	$1		"127.0.0.1"
	$2		"54662"

So the replacement string "$2 $1" results in "54662 127.0.0.1"

And the completed header which is sent to the backend will look like:

	"ClientIP: 54662 127.0.0.1"

An even more advanced example would be:

    set bereq.http.magic = "Client IP = " regsub(client.ip, ":", " port = ");

Where we also exploint the string concatenation ability of the "set" statement.

The result string is built in the request workspace, so you may need
to increase the workspace size if you do a lot of regsub()'s.

Currently there is no decent error handling for running out of workspace.


git-svn-id: http://www.varnish-cache.org/svn/trunk/varnish-cache@1667 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent 0790a439
......@@ -35,6 +35,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <regex.h>
......@@ -100,13 +101,72 @@ VRT_re_test(struct vsb *sb, const char *re, int sub)
return (1);
}
char *
const char *
VRT_regsub(struct sess *sp, const char *str, void *re, const char *sub)
{
static char foo[4] = "FOO";
(void)sp;
(void)str;
(void)re;
(void)sub;
return (foo);
regmatch_t pm[10];
regex_t *t;
int i, l;
char *b, *p, *e;
unsigned u, x;
AN(re);
t = re;
i = regexec(t, str, 10, pm, 0);
/* If it didn't match, we can return the original string */
if (i == REG_NOMATCH)
return(str);
u = WS_Reserve(sp->http->ws, 0);
e = p = b = sp->http->ws->f;
e += u;
/* Copy prefix to match */
if (pm[0].rm_so > 0) {
if (p + pm[0].rm_so < e)
memcpy(p, str, pm[0].rm_so);
p += pm[0].rm_so;
}
for ( ; *sub != '\0'; sub++ ) {
if (*sub == '&') {
l = pm[0].rm_eo - pm[0].rm_so;
if (l > 0) {
if (p + l < e)
memcpy(p, str + pm[0].rm_so, l);
p += l;
}
} else if (*sub == '$' && isdigit(sub[1])) {
x = sub[1] - '0';
sub++;
l = pm[x].rm_eo - pm[x].rm_so;
if (l > 0) {
if (p + l < e)
memcpy(p, str + pm[x].rm_so, l);
p += l;
}
} else {
if (p + 1 < e)
*p = *sub;
p++;
}
}
/* Copy suffix to match */
l = strlen(str + pm[0].rm_eo);
if (l > 0) {
if (p + l < e)
memcpy(p, str + pm[0].rm_eo, l);
p += l;
}
if (p + 1 < e)
*p++ = '\0';
xxxassert(p <= e);
if (p > e) {
WS_Release(sp->http->ws, 0);
return (str);
}
WS_Release(sp->http->ws, p - b);
return (b);
}
......@@ -68,7 +68,7 @@ void VRT_re_init(void **, const char *, int sub);
void VRT_re_fini(void *);
int VRT_re_match(const char *, void *re);
int VRT_re_test(struct vsb *, const char *, int sub);
char *VRT_regsub(struct sess *sp, const char *, void *, const char *);
const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);
void VRT_count(struct sess *, unsigned);
int VRT_rewrite(const char *, const char *);
......
......@@ -424,7 +424,7 @@ vcl_output_lang_h(struct vsb *sb)
vsb_cat(sb, "void VRT_re_fini(void *);\n");
vsb_cat(sb, "int VRT_re_match(const char *, void *re);\n");
vsb_cat(sb, "int VRT_re_test(struct vsb *, const char *, int sub);\n");
vsb_cat(sb, "char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n");
vsb_cat(sb, "const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n");
vsb_cat(sb, "\n");
vsb_cat(sb, "void VRT_count(struct sess *, unsigned);\n");
vsb_cat(sb, "int VRT_rewrite(const char *, const char *);\n");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment