Commit 5c03d65b authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

Add a feature 'esi_remove_bom' which will make ESI ignore and remove

UTF-8 BOM's at the start of an ESI-object.

Notice that the removal only happens if the file is actually ESI
processed on delivery, so to get BOM removal for non-XML files,
you may have to disable the XML test and insert a dummy ESI directive
such as <esi:remove>BOMs Be Gone!<esi:remove/> or similar.

Fixes   #1355
parent 66109744
......@@ -121,6 +121,7 @@ struct vep_state {
/*---------------------------------------------------------------------*/
static const char * const VEP_START = "[Start]";
static const char * const VEP_BOM = "[BOM]";
static const char * const VEP_TESTXML = "[TestXml]";
static const char * const VEP_NOTXML = "[NotXml]";
......@@ -175,6 +176,13 @@ static struct vep_match vep_match_attr_include[] = {
{ NULL, &VEP_SKIPATTR }
};
/*---------------------------------------------------------------------*/
static struct vep_match vep_match_bom[] = {
{ "\xeb\xbb\xbf", &VEP_START },
{ NULL, &VEP_BOM }
};
/*--------------------------------------------------------------------
* Report a parsing error
*/
......@@ -594,6 +602,13 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
*/
if (vep->state == VEP_START) {
if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
vep->match = vep_match_bom;
vep->state = VEP_MATCH;
} else
vep->state = VEP_BOM;
} else if (vep->state == VEP_BOM) {
vep_mark_skip(vep, p);
if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
vep->state = VEP_NEXTTAG;
else
......@@ -609,6 +624,12 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
if (p < e && *p == '<') {
p++;
vep->state = VEP_STARTTAG;
} else if (p < e && *p == '\xeb') {
VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
"No ESI processing, first char not '<'"
" (BOM? see feature esi_remove_bom)"
);
vep->state = VEP_NOTXML;
} else if (p < e) {
VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
"No ESI processing, first char not '<'");
......
varnishtest "Test ESI ignoring BOMs"
server s1 {
rxreq
expect req.url == /1
txresp -body "\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
rxreq
expect req.url == /2
txresp -body "\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
rxreq
expect req.url == /3
txresp -body "\xeb\xbb\xbf\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
rxreq
expect req.url == /4
txresp -body "\xeb\xbc <html> <esi:remove>blabla</esi:remove>"
} -start
varnish v1 -vcl+backend {
sub vcl_backend_response {
set beresp.do_esi = true;
}
} -start
client c1 {
# No ESI processing
txreq -url /1
rxresp
expect resp.bodylen == 47
} -run
varnish v1 -cliok "param.set feature +esi_remove_bom"
client c1 {
# BOM removed, ESI processing
txreq -url /2
rxresp
expect resp.bodylen == 13
} -run
client c1 {
# BOMs removed, ESI processing
txreq -url /3
rxresp
expect resp.bodylen == 13
} -run
client c1 {
# Not a BOM, no ESI processing
txreq -url /4
rxresp
expect resp.bodylen == 46
} -run
......@@ -53,3 +53,8 @@ FEATURE_BIT(ESI_IGNORE_OTHER_ELEMENTS, esi_ignore_other_elements, "",
"Ignore non-esi XML-elements",
"Allows syntax errors in the XML"
)
FEATURE_BIT(ESI_REMOVE_BOM, esi_remove_bom, "",
"Remove UTF-8 BOM",
"Remove UTF-8 BOM from front of object."
"Ignore and remove the UTF-8 BOM (0xeb 0xbb 0xbf) from front of object."
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment