Support string escaping options:

- none: "as is"
- minimal: minimal JSON escaping
- ascii: All UTF-8 as \uHHHH
parent 593eb73d
...@@ -35,7 +35,7 @@ AM_TESTS_ENVIRONMENT = \ ...@@ -35,7 +35,7 @@ AM_TESTS_ENVIRONMENT = \
PATH="$(abs_builddir):$(VARNISH_TEST_PATH):$(PATH)" \ PATH="$(abs_builddir):$(VARNISH_TEST_PATH):$(PATH)" \
LD_LIBRARY_PATH="$(VARNISH_LIBRARY_PATH)" LD_LIBRARY_PATH="$(VARNISH_LIBRARY_PATH)"
TEST_EXTENSIONS = .vtc TEST_EXTENSIONS = .vtc
VTC_LOG_COMPILER = varnishtest -vl VTC_LOG_COMPILER = varnishtest -vl -t 10
AM_VTC_LOG_FLAGS = \ AM_VTC_LOG_FLAGS = \
-p vcl_path="$(abs_top_srcdir)/vcl:$(VARNISHAPI_VCLDIR)" \ -p vcl_path="$(abs_top_srcdir)/vcl:$(VARNISHAPI_VCLDIR)" \
-p vmod_path="$(abs_builddir)/.libs:$(vmoddir):$(VARNISHAPI_VMODDIR)" -p vmod_path="$(abs_builddir)/.libs:$(vmoddir):$(VARNISHAPI_VMODDIR)"
......
...@@ -97,11 +97,12 @@ static const unsigned char jm_string[2] AL = { JM_string, '"' }; ...@@ -97,11 +97,12 @@ static const unsigned char jm_string[2] AL = { JM_string, '"' };
static const unsigned char jnil_object[4] AL = { JM_object, '{','}', 0 }; static const unsigned char jnil_object[4] AL = { JM_object, '{','}', 0 };
static const unsigned char jnil_array[4] AL = { JM_array, '[',']', 0 }; static const unsigned char jnil_array[4] AL = { JM_array, '[',']', 0 };
static const unsigned char jnil_string[4] AL = { JM_string, '"','"', 0 };
static const unsigned char jnull[6] AL = { JM_lit, 'n','u','l','l', 0}; static const unsigned char jnull[6] AL = { JM_lit, 'n','u','l','l', 0};
static const unsigned char jtrue[6] AL = { JM_lit, 't','r','u','e', 0}; static const unsigned char jtrue[6] AL = { JM_lit, 't','r','u','e', 0};
static const unsigned char jfalse[7] AL = { JM_lit, 'f','a','l','s','e', 0}; static const unsigned char jfalse[7] AL = { JM_lit, 'f','a','l','s','e', 0};
#define VSB_mcat(vsb, x) AZ(VSB_bcat(vsb, x, sizeof(x))) #define VSB_mcat(vsb, x) (void)VSB_bcat(vsb, x, sizeof(x))
#define isJ(p, X) (((uintptr_t)(p) & 1) == 1 && \ #define isJ(p, X) (((uintptr_t)(p) & 1) == 1 && \
memcmp((unsigned char *)(p) - 1, jm_ ## X, sizeof(jm_ ## X)) == 0) memcmp((unsigned char *)(p) - 1, jm_ ## X, sizeof(jm_ ## X)) == 0)
...@@ -242,54 +243,128 @@ vmod_number(VRT_CTX, struct VARGS(number)*a) ...@@ -242,54 +243,128 @@ vmod_number(VRT_CTX, struct VARGS(number)*a)
// on %.15g: https://stackoverflow.com/questions/30658919/the-precision-of-printf-with-specifier-g/54162486#54162486 // on %.15g: https://stackoverflow.com/questions/30658919/the-precision-of-printf-with-specifier-g/54162486#54162486
WS_VSB_new(vsb, ctx->ws); WS_VSB_new(vsb, ctx->ws);
AZ(VSB_putc(vsb, JM_number)); (void) VSB_putc(vsb, JM_number);
if (a->valid_string) if (a->valid_string)
AZ(VSB_bcat(vsb, a->string, (ssize_t)strlen(a->string))); (void)VSB_bcat(vsb, a->string, (ssize_t)strlen(a->string));
else if (valid_i) else if (valid_i)
AZ(VSB_printf(vsb, "%jd", i)); (void)VSB_printf(vsb, "%jd", i);
else if (valid_d) else if (valid_d)
AZ(VSB_printf(vsb, "%.15g", d)); (void)VSB_printf(vsb, "%.15g", d);
else else
WRONG("valid_X"); WRONG("valid_X");
p = WS_VSB_finish(vsb, ctx->ws, NULL); p = WS_VSB_finish(vsb, ctx->ws, NULL);
if (p == NULL) { if (p == NULL) {
VRT_fail(ctx, "j.number(): our of workspace"); VRT_fail(ctx, "j.number(): out of workspace");
return (NULL); return (NULL);
} }
AZ((uintptr_t)p & 1); AZ((uintptr_t)p & 1);
return (p + 1); return (p + 1);
} }
// add json string
static void static void
vsbjstring(struct vsb *vsb, const char *p) vsbjstring(struct vsb *vsb, const char *pa)
{ {
AZ(VSB_putc(vsb, '"')); (void) VSB_putc(vsb, '"');
VSB_quote(vsb, p, -1, VSB_QUOTE_JSON); (void) vsbjminimal(vsb, pa);
AZ(VSB_putc(vsb, '"')); (void) VSB_putc(vsb, '"');
} }
VCL_STRING VCL_STRING
vmod_string(VRT_CTX, VCL_STRANDS s) vmod_string(VRT_CTX, VCL_STRANDS s, VCL_ENUM esc)
{ {
struct strands sc;
struct vsb vsb[1]; struct vsb vsb[1];
const char *p; const char *p, *e;
ssize_t sl;
size_t l;
int i; int i;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
AN(s); AN(s);
// find first character
for (i = 0; i < s->n; i++) {
if (s->p[i] != NULL && *s->p[i] != '\0')
break;
}
if (i == s->n)
return ((const char *)jnil_string + 1);
const char *scp[s->n];
memset(scp, 0, sizeof scp);
sc.n = 0;
sc.p = scp;
if (esc == VENUM(none)) {
// skip initial quote
if (strcmp(s->p[i], "\"") == 0) {
i++;
if (i == s->n)
return ((const char *)jnil_string + 1);
}
else if (s->p[i][0] == '"')
sc.p[sc.n++] = s->p[i++] + 1;
}
for (; i < s->n; i++)
if (s->p[i] != NULL && *(s->p[i]) != '\0')
sc.p[sc.n++] = s->p[i];
WS_VSB_new(vsb, ctx->ws); WS_VSB_new(vsb, ctx->ws);
VSB_mcat(vsb, jm_string); // " VSB_mcat(vsb, jm_string); // "
for (i = 0; i < s->n; i++)
VSB_quote(vsb, s->p[i], -1, VSB_QUOTE_JSON); if (esc == VENUM(none)) {
AZ(VSB_putc(vsb, '"')); for (i = 0; i < sc.n - 1; i++)
(void)VSB_bcat(vsb, sc.p[i], strlen(sc.p[i]));
assert(i == sc.n - 1);
l = strlen(sc.p[i]);
AN(l);
if (sc.p[i][l - 1] == '"')
l--;
(void)VSB_bcat(vsb, sc.p[i], l);
}
else if (esc == VENUM(minimal)) {
for (i = 0; i < sc.n; i++)
if (! vsbjminimal(vsb, sc.p[i]))
break;
}
else if (esc == VENUM(ascii)) {
e = NULL;
for (i = 0; i < sc.n; i++)
if (! vsbjascii(vsb, sc.p[i], &e))
break;
if (e != NULL) {
VRT_fail(ctx,
"j.string(x, ascii) bad UTF-8 at: ...%.10s", e);
WS_Release(ctx->ws, 0);
return (NULL);
}
}
else
WRONG("esc enum");
(void)VSB_putc(vsb, '"');
sl = VSB_len(vsb);
p = WS_VSB_finish(vsb, ctx->ws, NULL); p = WS_VSB_finish(vsb, ctx->ws, NULL);
if (p == NULL) { if (p == NULL) {
VRT_fail(ctx, "j.string(): our of workspace"); VRT_fail(ctx, "j.string(): out of workspace");
return (NULL); return (NULL);
} }
e = NULL;
AZ(is_jquot(p + 2, &e));
// validation succeeds if error points to the final "
if (e == p + sl - 1)
assert(*e == '"');
else if (esc == VENUM(none)) {
VRT_fail(ctx,
"j.string(x, none) malformed string at: ...%.10s", e);
return (NULL);
}
else
WRONG("is_jquot on our own output failed");
AZ((uintptr_t)p & 1); AZ((uintptr_t)p & 1);
return (p + 1); return (p + 1);
} }
...@@ -303,7 +378,7 @@ vsbjvalue(struct vsb *vsb, const char *p) ...@@ -303,7 +378,7 @@ vsbjvalue(struct vsb *vsb, const char *p)
if (! (is_J(p) || is_jnumber(p) || is_jlit(p))) if (! (is_J(p) || is_jnumber(p) || is_jlit(p)))
vsbjstring(vsb, p); vsbjstring(vsb, p);
else else
AZ(VSB_bcat(vsb, p, (ssize_t)strlen(p))); (void) VSB_bcat(vsb, p, (ssize_t)strlen(p));
} }
VCL_STRING VCL_STRING
...@@ -330,13 +405,13 @@ vmod_array(VRT_CTX, VCL_STRANDS s) ...@@ -330,13 +405,13 @@ vmod_array(VRT_CTX, VCL_STRANDS s)
VSB_mcat(vsb, jm_array); // [ VSB_mcat(vsb, jm_array); // [
vsbjvalue(vsb, s->p[0]); vsbjvalue(vsb, s->p[0]);
for (i = 1; i < s->n; i++) { for (i = 1; i < s->n; i++) {
AZ(VSB_putc(vsb, ',')); (void)VSB_putc(vsb, ',');
vsbjvalue(vsb, s->p[i]); vsbjvalue(vsb, s->p[i]);
} }
AZ(VSB_putc(vsb, ']')); (void)VSB_putc(vsb, ']');
p = WS_VSB_finish(vsb, ctx->ws, NULL); p = WS_VSB_finish(vsb, ctx->ws, NULL);
if (p == NULL) { if (p == NULL) {
VRT_fail(ctx, "j.array(): our of workspace"); VRT_fail(ctx, "j.array(): out of workspace");
return (NULL); return (NULL);
} }
AZ((uintptr_t)p & 1); AZ((uintptr_t)p & 1);
...@@ -352,14 +427,18 @@ static int ...@@ -352,14 +427,18 @@ static int
vsbjkey(VRT_CTX, struct vsb *vsb, const char *p) vsbjkey(VRT_CTX, struct vsb *vsb, const char *p)
{ {
if (is_Jstring(p)) if (is_Jstring(p))
AZ(VSB_bcat(vsb, p, (ssize_t)strlen(p))); (void) VSB_bcat(vsb, p, (ssize_t)strlen(p));
else if (is_J(p) || is_jnumber(p) || is_jlit(p)) { else if (is_J(p) || is_jnumber(p) || is_jlit(p)) {
VRT_fail(ctx, "keys must be strings, got %s", p); VRT_fail(ctx, "keys must be strings, got %s", p);
return (0); return (0);
} }
else else
vsbjstring(vsb, p); vsbjstring(vsb, p);
return (1); if (! VSB_error(vsb))
return (1);
VRT_fail(ctx, "Out of workspace while formatting json key");
return (0);
} }
// add key: value // add key: value
...@@ -368,7 +447,7 @@ vsbjkv(VRT_CTX, struct vsb *vsb, const char *k, const char *v) ...@@ -368,7 +447,7 @@ vsbjkv(VRT_CTX, struct vsb *vsb, const char *k, const char *v)
{ {
if (! vsbjkey(ctx, vsb, k)) if (! vsbjkey(ctx, vsb, k))
return (0); return (0);
AZ(VSB_putc(vsb, ':')); (void)VSB_putc(vsb, ':');
vsbjvalue(vsb, v); vsbjvalue(vsb, v);
return (1); return (1);
} }
...@@ -408,19 +487,55 @@ vmod_object(VRT_CTX, VCL_STRANDS s) ...@@ -408,19 +487,55 @@ vmod_object(VRT_CTX, VCL_STRANDS s)
} }
for (i = 2; i < s->n; i += 2) { for (i = 2; i < s->n; i += 2) {
assert((i & 1) == 0); assert((i & 1) == 0);
AZ(VSB_putc(vsb, ',')); (void)VSB_putc(vsb, ',');
//lint -e{679} trunc //lint -e{679} trunc
if (vsbjkv(ctx, vsb, s->p[i], s->p[i + 1])) if (vsbjkv(ctx, vsb, s->p[i], s->p[i + 1]))
continue; continue;
WS_Release(ctx->ws, 0); WS_Release(ctx->ws, 0);
return (NULL); return (NULL);
} }
AZ(VSB_putc(vsb, '}')); (void)VSB_putc(vsb, '}');
p = WS_VSB_finish(vsb, ctx->ws, NULL); p = WS_VSB_finish(vsb, ctx->ws, NULL);
if (p == NULL) { if (p == NULL) {
VRT_fail(ctx, "j.object(): our of workspace"); VRT_fail(ctx, "j.object(): out of workspace");
return (NULL); return (NULL);
} }
AZ((uintptr_t)p & 1); AZ((uintptr_t)p & 1);
return (p + 1); return (p + 1);
} }
VCL_STRING
vmod_unquote(VRT_CTX, VCL_STRING p)
{
struct vsb vsb[1];
const char *err = NULL;
size_t l;
char *r;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
if (*p != '"' || p[strlen(p) - 1] != '"') {
VRT_fail(ctx, "j.unquote() argument missing quotes");
return (NULL);
}
p++;
WS_VSB_new(vsb, ctx->ws);
if (vsbjunquot(vsb, p, &err) && err != NULL) {
VRT_fail(ctx, "j.unquote() error at: ...%.10s", err);
return (NULL);
}
r = WS_VSB_finish(vsb, ctx->ws, NULL);
if (r == NULL) {
VRT_fail(ctx, "j.unquote(): out of workspace");
return (NULL);
}
l = strlen(r);
if (l == 0)
return (r);
l--;
assert(r[l] == '"');
r[l] = '\0';
return (r);
}
...@@ -33,12 +33,14 @@ SYNOPSIS ...@@ -33,12 +33,14 @@ SYNOPSIS
STRING number([INT integer], [REAL real], [BYTES bytes], [DURATION duration], [TIME time], [STRING string]) STRING number([INT integer], [REAL real], [BYTES bytes], [DURATION duration], [TIME time], [STRING string])
STRING string(STRING) STRING string(STRING, ENUM escape)
STRING array(STRING) STRING array(STRING)
STRING object(STRING) STRING object(STRING)
STRING unquote(STRING)
INTRODUCTION INTRODUCTION
============ ============
...@@ -48,6 +50,8 @@ THE** `WARNING`_. ...@@ -48,6 +50,8 @@ THE** `WARNING`_.
.. _JSON: https://www.json.org/json-en.html .. _JSON: https://www.json.org/json-en.html
.. _RFC 8259: https://www.rfc-editor.org/rfc/rfc8259
Formatting `JSON`_ in pure VCL is a PITA, because string processing in Formatting `JSON`_ in pure VCL is a PITA, because string processing in
VCL was never made for it. VCL being a Domain Specific Language, it VCL was never made for it. VCL being a Domain Specific Language, it
was made for processing HTTP headers. was made for processing HTTP headers.
...@@ -295,11 +299,48 @@ Deprecated alias for ``number()``. ...@@ -295,11 +299,48 @@ Deprecated alias for ``number()``.
.. _j.string(): .. _j.string():
STRING string(STRING) STRING string(STRING, ENUM escape)
--------------------- ----------------------------------
::
STRING string(STRING, ENUM {none, minimal, ascii} escape=minimal)
Return the argument as a JSON string with characters escaped according
to the *escape* argument:
* ``none``:
The string is expected to already be JSON-encoded.
It can optionally have left and/or right quotes, if not, they are
added.
Passing a string which is incorrectly escaped for JSON triggers a VCL
failure. UTF-8 encoding is not checked.
* ``minimal``
Return the argument as a JSON string with quotes added and special The string is minimally escaped for JSON according to `RFC 8259`_:
characters escaped appropriately.
quotation mark, reverse solidus, and the control characters (U+0000
through U+001F)
are escaped.
All other characters are assumed to be UTF-8 encoded and left
unchanged.
* ``ascii``
The string is escaped such that only 7-bit ASCII characters are left:
In addition to ``minimal`` escaping, all UTF-8 characters are
decoded and escaped as one or two ``\u``\ *HHHH* sequences.
UTF-8 decoding errors trigger a VCL failure.
This function never escapes the solidus ``/``.
ALIAS str() ALIAS str()
----------- -----------
...@@ -361,6 +402,14 @@ ALIAS obj() ...@@ -361,6 +402,14 @@ ALIAS obj()
Deprecated alias for ``object()``. Deprecated alias for ``object()``.
.. _j.unquote():
STRING unquote(STRING)
----------------------
Utility function to decode JSON strings into UTF-8
WARNING WARNING
======= =======
......
...@@ -36,6 +36,8 @@ THE** `WARNING`_. ...@@ -36,6 +36,8 @@ THE** `WARNING`_.
.. _JSON: https://www.json.org/json-en.html .. _JSON: https://www.json.org/json-en.html
.. _RFC 8259: https://www.rfc-editor.org/rfc/rfc8259
Formatting `JSON`_ in pure VCL is a PITA, because string processing in Formatting `JSON`_ in pure VCL is a PITA, because string processing in
VCL was never made for it. VCL being a Domain Specific Language, it VCL was never made for it. VCL being a Domain Specific Language, it
was made for processing HTTP headers. was made for processing HTTP headers.
...@@ -253,10 +255,44 @@ a VCL failure. ...@@ -253,10 +255,44 @@ a VCL failure.
$Alias num number $Alias num number
$Function STRING string(STRANDS) $Function STRING string(STRANDS,
ENUM { none, minimal, ascii } escape="minimal")
Return the argument as a JSON string with characters escaped according
to the *escape* argument:
* ``none``:
The string is expected to already be JSON-encoded.
It can optionally have left and/or right quotes, if not, they are
added.
Passing a string which is incorrectly escaped for JSON triggers a VCL
failure. UTF-8 encoding is not checked.
* ``minimal``
The string is minimally escaped for JSON according to `RFC 8259`_:
quotation mark, reverse solidus, and the control characters (U+0000
through U+001F)
Return the argument as a JSON string with quotes added and special are escaped.
characters escaped appropriately.
All other characters are assumed to be UTF-8 encoded and left
unchanged.
* ``ascii``
The string is escaped such that only 7-bit ASCII characters are left:
In addition to ``minimal`` escaping, all UTF-8 characters are
decoded and escaped as one or two ``\u``\ *HHHH* sequences.
UTF-8 decoding errors trigger a VCL failure.
This function never escapes the solidus ``/``.
$Alias str string $Alias str string
...@@ -303,6 +339,11 @@ concatenation argument (strands) as key/value pairs. ...@@ -303,6 +339,11 @@ concatenation argument (strands) as key/value pairs.
$Alias obj object $Alias obj object
$Function STRING unquote(STRING)
Utility function to decode JSON strings into UTF-8
WARNING WARNING
======= =======
......
varnishtest "test vmod-j" varnishtest "test vmod-j"
varnish v1 -vcl { varnish v1 -vcl {
import blob;
import std; import std;
import vtc;
import j; import j;
backend proforma none; backend proforma none;
sub vcl_recv { sub vcl_recv {
if (req.url ~ "^/e") {
return (synth(400));
}
return (synth(200)); return (synth(200));
} }
sub vcl_synth { sub vcl_synth {
if (resp.status == 200) {
call synth_ok;
}
if (resp.status == 400) {
call synth_err;
}
set resp.status = 412;
return (deliver);
}
sub synth_err {
if (req.url == "/e-badutf") {
set resp.http.bad = j.string(
blob.transcode(HEX, encoded="c080"), ascii);
}
else if (req.url == "/e-ws") {
vtc.workspace_alloc(client, -10);
set resp.http.ws = j.string(
resp.http.date + resp.http.date);
}
else if (req.url == "/e-mal") {
set resp.http.mal = j.string({"""abc"}, none);
}
return (deliver);
}
sub synth_ok {
set resp.http.barf = {"{"key":""} + resp.reason + {"""} + "}"; set resp.http.barf = {"{"key":""} + resp.reason + {"""} + "}";
set resp.http.easy = j.obj("key" + resp.reason); set resp.http.easy = j.obj("key" + resp.reason);
...@@ -36,6 +68,15 @@ varnish v1 -vcl { ...@@ -36,6 +68,15 @@ varnish v1 -vcl {
####### #######
# string # string
set resp.http.st = j.string(true); set resp.http.st = j.string(true);
set resp.http.snon = j.string("\t" + req.http.foo +
"\r\u0001", none);
set resp.http.snoq = j.string({""\t\r\u0001""}, none);
set resp.http.snoqq = j.string(req.http.foo + req.http.foo +
{"""} + req.http.foo + "\t\r\u0001", none);
set resp.http.smin = j.string("""
🐰€öäü""");
set resp.http.sasc = j.string("""
🐰€öäü""", ascii);
####### #######
# array # array
...@@ -96,6 +137,11 @@ client c1 { ...@@ -96,6 +137,11 @@ client c1 {
expect resp.http.ns == "-42.42e+42" expect resp.http.ns == "-42.42e+42"
expect resp.http.st == "\"true\"" expect resp.http.st == "\"true\""
expect resp.http.snon == "\"\\t\\r\\u0001\""
expect resp.http.snoq == "\"\\t\\r\\u0001\""
expect resp.http.snoqq == "\"\\t\\r\\u0001\""
expect resp.http.smin == "\"\\t\\n🐰€öäü\""
expect resp.http.sasc == "\"\\t\\n\\ud83d\\udc30\\u20ac\\u00f6\\u00e4\\u00fc\""
expect resp.http.a0 == "[]" expect resp.http.a0 == "[]"
expect resp.http.a1 == "[1]" expect resp.http.a1 == "[1]"
...@@ -110,4 +156,42 @@ client c1 { ...@@ -110,4 +156,42 @@ client c1 {
expect resp.http.o3 == \ expect resp.http.o3 == \
"{\"A\":null,\"B\":{\"BB\":42.42e42,\"CC\":false,\"DD\":true},\"C\":[\"A\",2,{}]}" "{\"A\":null,\"B\":{\"BB\":42.42e42,\"CC\":false,\"DD\":true},\"C\":[\"A\",2,{}]}"
expect resp.http.o4 == "{\"A\":{}}" expect resp.http.o4 == "{\"A\":{}}"
} -run } -start
logexpect l2 -v v1 -q "ReqURL ~ \"^/e-badutf\"" {
expect * * VCL_Error "bad UTF-8 at:"
} -start
client c2 {
txreq -url "/e-badutf"
rxresp
expect resp.status == 500
} -start
logexpect l3 -v v1 -q "ReqURL ~ \"^/e-ws\"" {
expect * * VCL_Error "j.string..: out of work"
} -start
client c3 {
txreq -url "/e-ws"
rxresp
expect resp.status == 500
} -start
logexpect l4 -v v1 -q "ReqURL ~ \"^/e-mal\"" {
expect * * VCL_Error "j.string.*malformed"
} -start
client c4 {
txreq -url "/e-mal"
rxresp
expect resp.status == 500
} -start
client c1 -wait
client c2 -wait
logexpect l2 -wait
client c3 -wait
logexpect l3 -wait
client c4 -wait
logexpect l4 -wait
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment