Commit 60209e72 authored by Geoff Simmons's avatar Geoff Simmons

implement the compile option locale

parent 9bd84fe2
......@@ -50,7 +50,7 @@ regex
::
new OBJ = regex(STRING pattern, BOOL allow_empty_class=0, BOOL anchored=0, ENUM {ANYCRLF,UNICODE} bsr=0, BOOL alt_bsux=0, BOOL alt_circumflex=0, BOOL alt_verbnames=0, BOOL caseless=0, BOOL dotall=0, BOOL dollar_endonly=0, BOOL dupnames=0, BOOL extended=0, BOOL firstline=0, STRING locale=0, BOOL match_unset_backref=0, INT max_pattern_len=0, BOOL multiline=0, BOOL never_backslash_c=0, BOOL never_ucp=0, BOOL never_utf=0, ENUM {CR,LF,CRLF,ANYCRLF,ANY} newline=0, BOOL no_auto_capture=0, BOOL no_auto_possess=0, BOOL no_dotstar_anchor=0, BOOL no_start_optimize=0, BOOL no_utf_check=0, INT parens_nest_limit=0, BOOL ucp=0, BOOL ungreedy=0, BOOL use_offset_limit=0, BOOL utf=0)
new OBJ = regex(STRING pattern, BOOL allow_empty_class=0, BOOL anchored=0, ENUM {ANYCRLF,UNICODE} bsr=0, BOOL alt_bsux=0, BOOL alt_circumflex=0, BOOL alt_verbnames=0, BOOL caseless=0, BOOL dollar_endonly=0, BOOL dotall=0, BOOL dupnames=0, BOOL extended=0, BOOL firstline=0, STRING locale=0, BOOL match_unset_backref=0, INT max_pattern_len=0, BOOL multiline=0, BOOL never_backslash_c=0, BOOL never_ucp=0, BOOL never_utf=0, ENUM {CR,LF,CRLF,ANYCRLF,ANY} newline=0, BOOL no_auto_capture=0, BOOL no_auto_possess=0, BOOL no_dotstar_anchor=0, BOOL no_start_optimize=0, BOOL no_utf_check=0, INT parens_nest_limit=0, BOOL ucp=0, BOOL ungreedy=0, BOOL use_offset_limit=0, BOOL utf=0)
# XXX options for dfa_match, jit fast path, start_offset
# XXX option to make saving the match ctx with PRIV_CALL optional
......
......@@ -93,6 +93,12 @@ PKG_CHECK_MODULES([PCRE2], [libpcre2-8])
AC_SUBST([PCRE2_CFLAGS])
AC_SUBST([PCRE2_LIBS])
if test ! -z `locale -a | grep '^fr_FR$'`; then
AC_SUBST([LOCALE_FR_VTC], [locale_fr.vtc.yes])
else
AC_SUBST([LOCALE_FR_VTC], [locale_fr.vtc.no])
fi
# --enable-stack-protector
AC_ARG_ENABLE(stack-protector,
AS_HELP_STRING([--enable-stack-protector],[enable stack protector (default is YES)]),
......
......@@ -21,14 +21,18 @@ vcc_if.c: vcc_if.h
vcc_if.h vmod_pcre2.man.rst: @VMODTOOL@ $(top_srcdir)/src/vmod_pcre2.vcc
@VMODTOOL@ $(top_srcdir)/src/vmod_pcre2.vcc
VMOD_TESTS = $(top_srcdir)/src/tests/*.vtc
VMOD_TESTS = $(top_srcdir)/src/tests/*.vtc $(top_srcdir)/src/tests/locale_fr.vtc
.PHONY: $(VMOD_TESTS)
$(top_srcdir)/src/tests/*.vtc: libvmod_pcre2.la
locale_fr:
cp $(top_srcdir)/src/tests/@LOCALE_FR_VTC@ $(top_srcdir)/src/tests/locale_fr.vtc
$(top_srcdir)/src/tests/*.vtc $(top_srcdir)/src/tests/locale_fr.vtc: locale_fr libvmod_pcre2.la
PATH=@LIBVARNISHAPI_SBINDIR@:$$PATH \
@VARNISHTEST@ -Dvmod_topbuild=$(abs_top_builddir) $@
check: $(VMOD_TESTS)
rm $(top_srcdir)/src/tests/locale_fr.vtc
EXTRA_DIST = \
vmod_pcre2.vcc \
......@@ -38,4 +42,5 @@ CLEANFILES = \
$(builddir)/vcc_if.c \
$(builddir)/vcc_if.h \
$(builddir)/vmod_pcre2.rst \
$(builddir)/vmod_pcre2.man.rst
$(builddir)/vmod_pcre2.man.rst \
$(top_srcdir)/src/tests/locale_fr.vtc
......@@ -462,3 +462,62 @@ client c1 {
expect resp.http.r3-1 == "true"
expect resp.http.r3-2 == "false"
} -run
# Test the locale option with the POSIX locale, which is the only
# locale we can presume exists (at least on POSIX-conforming
# systems). AFAICT the results are no different at all from the pcre2
# standard character tables, but at least this tests against obvious
# bugs.
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r1 = pcre2.regex("(?i)abc \d \D \w \W \s \S \babc\B",
locale="POSIX");
new r2 = pcre2.regex("[[:alnum:]]+ [[:alpha:]]+ [[:ascii:]]+",
locale="POSIX");
new r3 = pcre2.regex("[[:blank:]]+_[[:cntrl:]]+ [[:digit:]]+",
locale="POSIX");
new r4 = pcre2.regex("[[:graph:]]+ [[:lower:]]+ [[:print:]]+",
locale="POSIX");
new r5 = pcre2.regex("[[:space:]]+_[[:upper:]]+ [[:word:]]+",
locale="POSIX");
new r6 = pcre2.regex("[[:xdigit:]]+", locale="POSIX");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.r1 = r1.match("aBc 0 x x ! x AbCx");
set resp.http.r2 = r2.match("a1 bb ~~");
set resp.http.r3 = r3.match({" _ 123456789"});
set resp.http.r4 = r4.match({"ab12!"§$%&/ xyz !"§$%&/"});
set resp.http.r5 = r5.match({" _ABCXYZ _abcXYZ987"});
set resp.http.r6 = r6.match("0123456789abcdef");
}
}
client c1 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1 == "true"
expect resp.http.r2 == "true"
expect resp.http.r3 == "true"
expect resp.http.r4 == "true"
expect resp.http.r5 == "true"
expect resp.http.r6 == "true"
} -run
# Pretty sure that this locale will not exist anywhere
varnish v1 -errvcl {vmod pcre2 error: Cannot set locale lPb2PZDb4KcfbKJPSww1cm6omvoF in r constructor} {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r = pcre2.regex("", locale="lPb2PZDb4KcfbKJPSww1cm6omvoF");
}
}
varnishtest "not testing compile option locale=fr_FR"
# If this test is run as locale_fr.vtc, then compile option
# locale="fr_FR" is not being tested for this build, because autoconf
# could not verify the existence of that locale on this host.
#
# Check the command 'locale -a' if you think this is an error -- you
# may be able to generate the locale on your platform and try again.
#
# Simpler tests for the locale option are run in compile_opt.vtc.
#
# If the locale had been found, this test would have executed more
# detailed tests for option locale, following the self-tests for
# libpcre2.
# -*-mode: vcl; coding: iso-8859-1 -*-
# The non-ASCII letters in this source correspond to ISO-8859-1 (aka
# ISO-Latin-1). You might want to set up your editor accordingly.
varnishtest "compile option locale using a non-standard locale"
# tests from pcre2 testoutput3
# XXX add the tests that require backrefs and pcre2_pattern_info
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r1 = pcre2.regex("^[\w]+", locale="fr_FR");
new r2 = pcre2.regex("^[\w]+");
new r3 = pcre2.regex("^[\W]+", locale="fr_FR");
new r4 = pcre2.regex("^[\W]+");
new r5 = pcre2.regex("[\b]", locale="fr_FR");
new r6 = pcre2.regex("[\b]");
new r7 = pcre2.regex("^\w+", locale="fr_FR");
new r8 = pcre2.regex("^\w+");
new r9 = pcre2.regex("(.+)\b(.+)", locale="fr_FR");
new r10 = pcre2.regex("(.+)\b(.+)");
new r11 = pcre2.regex("cole", locale="fr_FR", caseless=true);
new r12 = pcre2.regex("cole", caseless=true);
new r13 = pcre2.regex("^[\xc8-\xc9]", locale="fr_FR",
caseless=true);
new r14 = pcre2.regex("^[\xc8-\xc9]", locale="fr_FR");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.r1 = r1.match("cole");
set resp.http.r2 = r2.match("cole");
set resp.http.r3 = r3.match("cole");
set resp.http.r4 = r4.match("cole");
set resp.http.r5-1 = r5.match("");
set resp.http.r6-1 = r6.match("");
set resp.http.r5-2 = r5.match("a");
set resp.http.r6-2 = r6.match("a");
set resp.http.r7 = r7.match("cole");
set resp.http.r8 = r8.match("cole");
set resp.http.r9 = r9.match("cole");
set resp.http.r10 = r10.match("cole");
set resp.http.r11-1 = r11.match("cole");
set resp.http.r12-1 = r12.match("cole");
set resp.http.r11-2 = r11.match("cole");
set resp.http.r12-2 = r12.match("cole");
set resp.http.r13-1 = r13.match("cole");
set resp.http.r13-2 = r13.match("cole");
set resp.http.r14-1 = r14.match("cole");
set resp.http.r14-2 = r14.match("cole");
}
} -start
client c1 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1 == "true"
expect resp.http.r2 == "false"
expect resp.http.r3 == "false"
expect resp.http.r4 == "true"
expect resp.http.r5-1 == "true"
expect resp.http.r5-2 == "false"
expect resp.http.r6-1 == "true"
expect resp.http.r6-2 == "false"
expect resp.http.r7 == "true"
expect resp.http.r8 == "false"
expect resp.http.r9 == "false"
expect resp.http.r10 == "true"
expect resp.http.r11-1 == "true"
expect resp.http.r11-2 == "true"
expect resp.http.r12-1 == "true"
expect resp.http.r12-2 == "false"
expect resp.http.r13-1 == "true"
expect resp.http.r13-2 == "true"
expect resp.http.r14-1 == "true"
expect resp.http.r14-2 == "false"
} -run
......@@ -29,6 +29,7 @@
#include <stdint.h>
#include <string.h>
#include <locale.h>
#include "vcl.h"
#include "cache/cache.h"
......@@ -248,9 +249,25 @@ vmod_regex__init(VRT_CTX, struct vmod_pcre2_regex **regexp,
AZ(pcre2_set_bsr(ccontext, val));
}
if (locale != NULL) {
VERR(ctx, "locales not implemented yet in %s constructor",
vcl_name);
return;
const unsigned char *tables;
char *saved_lc = setlocale(LC_CTYPE, NULL);
AN(saved_lc);
if (setlocale(LC_CTYPE, locale) == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERR(ctx, "Cannot set locale %s in %s constructor",
locale, vcl_name);
return;
}
tables = pcre2_maketables(NULL);
if (tables == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERRNOMEM(ctx, "Creating tables for locale %s in "
"%s constructor", locale, vcl_name);
return;
}
AZ(pcre2_set_character_tables(ccontext, tables));
AN(setlocale(LC_CTYPE, saved_lc));
}
if (max_pattern_len != 0)
AZ(pcre2_set_max_pattern_length(ccontext,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment