Commit 25beb0c3 authored by Geoff Simmons's avatar Geoff Simmons

Move some of the code out into a separate source and header.

This mostly moves out functions that are not on the fast path and
we don't try to inline. Also just clears up a lot of clutter.
parent a403707a
Pipeline #261 skipped
......@@ -6,7 +6,9 @@ vmod_LTLIBRARIES = libvmod_pcre2.la
libvmod_pcre2_la_LDFLAGS = -module -export-dynamic -avoid-version -shared
libvmod_pcre2_la_SOURCES = \
vmod_pcre2.c
vmod_pcre2.h \
vmod_pcre2.c \
pcre2.c
nodist_libvmod_pcre2_la_SOURCES = \
vcc_if.c \
......@@ -14,7 +16,9 @@ nodist_libvmod_pcre2_la_SOURCES = \
libvmod_pcre2_la_LIBADD = @PCRE2_LIBS@
vmod_pcre2.lo: vcc_if.c vcc_if.h
vmod_pcre2.lo: vcc_if.c vcc_if.h vmod_pcre2.h
pcre2.lo: vmod_pcre2.h
vcc_if.c: vcc_if.h
......
/*-
* Copyright 2017 UPLEX - Nils Goroll Systemoptimierung
* All rights reserved.
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <locale.h>
#include "vmod_pcre2.h"
#define INIT(ctx) (((ctx)->method & VCL_MET_INIT) != 0)
#define SET_CTX_PARAM(ctx, param, type) do { \
if ((param) != 0) \
AZ(pcre2_set_##param((ctx), (type)(param))); \
} while(0)
void
errmsg(VRT_CTX, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
if (ctx->vsl)
VSLbv(ctx->vsl, SLT_VCL_Error, fmt, args);
else
VSLv(SLT_VCL_Error, 0, fmt, args);
va_end(args);
if (ctx->method == VCL_MET_INIT) {
AN(ctx->msg);
va_start(args, fmt);
VSB_vprintf(ctx->msg, fmt, args);
VSB_putc(ctx->msg, '\n');
va_end(args);
VRT_handling(ctx, VCL_RET_FAIL);
}
}
void
report_pcre2_err(VRT_CTX, int errcode, const char * const restrict msg,
const char * const restrict post)
{
int ret;
uintptr_t snap = WS_Snapshot(ctx->ws);
unsigned bytes = WS_Reserve(ctx->ws, 0);
char *buf = WS_Front(ctx->ws);
ret = pcre2_get_error_message(errcode, (PCRE2_UCHAR *)buf,
(PCRE2_SIZE)bytes);
if (ret == PCRE2_ERROR_BADDATA) {
WS_ReleaseP(ctx->ws, buf);
VERR(ctx, "%s (unknown error code)%s", msg, post);
}
else if (ret == PCRE2_ERROR_NOMEMORY) {
unsigned len = strlen(buf) + 1;
if (len > bytes) {
WS_Release(ctx->ws, 0);
ERRNOMEM(ctx, "allocating workspace for pcre2 error "
"message");
VERR(ctx, "%s: libpcre2 error code %d%s", msg, errcode,
post);
}
else {
WS_Release(ctx->ws, len);
VERR(ctx, "%s: %s (truncated error message)%s", msg,
buf, post);
}
}
else {
WS_Release(ctx->ws, ret);
VERR(ctx, "%s: %s%s", msg, buf, post);
}
WS_Reset(ctx->ws, snap);
}
static inline void
set_opt(uint32_t *options, VCL_BOOL vmod_opt, uint32_t pcre2_opt)
{
if (vmod_opt)
*options |= pcre2_opt;
}
static inline void
set_compile_flags(uint32_t *options, COMPILE_FLAGS)
{
/* XXX check illegal combinations such as never_ucp && ucp ... ? */
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, allow_empty_class, PCRE2_ALLOW_EMPTY_CLASS);
set_opt(options, alt_bsux, PCRE2_ALT_BSUX);
set_opt(options, alt_circumflex, PCRE2_ALT_CIRCUMFLEX);
set_opt(options, alt_verbnames, PCRE2_ALT_VERBNAMES);
set_opt(options, caseless, PCRE2_CASELESS);
set_opt(options, dollar_endonly, PCRE2_DOLLAR_ENDONLY);
set_opt(options, dotall, PCRE2_DOTALL);
set_opt(options, dupnames, PCRE2_DUPNAMES);
set_opt(options, extended, PCRE2_EXTENDED);
set_opt(options, firstline, PCRE2_FIRSTLINE);
set_opt(options, match_unset_backref, PCRE2_MATCH_UNSET_BACKREF);
set_opt(options, multiline, PCRE2_MULTILINE);
set_opt(options, never_backslash_c, PCRE2_NEVER_BACKSLASH_C);
set_opt(options, never_ucp, PCRE2_NEVER_UCP);
set_opt(options, never_utf, PCRE2_NEVER_UTF);
set_opt(options, no_auto_capture, PCRE2_NO_AUTO_CAPTURE);
set_opt(options, no_auto_possess, PCRE2_NO_AUTO_POSSESS);
set_opt(options, no_dotstar_anchor, PCRE2_NO_DOTSTAR_ANCHOR);
set_opt(options, no_start_optimize, PCRE2_NO_START_OPTIMIZE);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
set_opt(options, ucp, PCRE2_UCP);
set_opt(options, ungreedy, PCRE2_UNGREEDY);
set_opt(options, use_offset_limit, PCRE2_USE_OFFSET_LIMIT);
set_opt(options, utf, PCRE2_UTF);
}
pcre2_compile_context *
get_compile_opts(VRT_CTX, COMPILE_CTX_OPTS, COMPILE_FLAGS, uint32_t *options,
const char * restrict const context,
const char * restrict const caller)
{
pcre2_compile_context *ccontext;
uint32_t val;
if (!check_uint32_range(ctx, parens_nest_limit, "parens_nest_limit",
context, caller))
return NULL;
if (max_pattern_length < 0) {
VERR(ctx, "max_pattern_len (%lld) must be >= 0 in %s%s",
(long long)max_pattern_length, context, caller);
return NULL;
}
if ((ccontext = pcre2_compile_context_create(NULL)) == NULL) {
VERR(ctx, "failed to create compile context in %s%s", context,
caller);
return NULL;
}
SET_CTX_PARAM(ccontext, max_pattern_length, PCRE2_SIZE);
SET_CTX_PARAM(ccontext, parens_nest_limit, uint32_t);
if (bsrs != NULL) {
if (strcmp("ANYCRLF", bsrs) == 0)
val = PCRE2_BSR_ANYCRLF;
else if (strcmp("UNICODE", bsrs) == 0)
val = PCRE2_BSR_UNICODE;
else
WRONG("Illegal bsr enum value");
AZ(pcre2_set_bsr(ccontext, val));
}
if (locale != NULL) {
const unsigned char *tables;
char *saved_lc = setlocale(LC_CTYPE, NULL);
AN(saved_lc);
if (setlocale(LC_CTYPE, locale) == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERR(ctx, "Cannot set locale %s in %s%s", locale,
context, caller);
return NULL;
}
tables = pcre2_maketables(NULL);
if (tables == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERRNOMEM(ctx, "Creating tables for locale %s in %s%s",
locale, context, caller);
return NULL;
}
AZ(pcre2_set_character_tables(ccontext, tables));
AN(setlocale(LC_CTYPE, saved_lc));
}
if (newlines != NULL) {
if (strcmp("CR", newlines) == 0)
val = PCRE2_NEWLINE_CR;
else if (strcmp("LF", newlines) == 0)
val = PCRE2_NEWLINE_LF;
else if (strcmp("CRLF", newlines) == 0)
val = PCRE2_NEWLINE_CRLF;
else if (strcmp("ANYCRLF", newlines) == 0)
val = PCRE2_NEWLINE_ANYCRLF;
else if (strcmp("ANY", newlines) == 0)
val = PCRE2_NEWLINE_ANY;
else
WRONG("Illegal newline enum value");
AZ(pcre2_set_newline(ccontext, val));
}
set_compile_flags(options, COMPILE_FLAGS_PARAMS);
return ccontext;
}
static inline void
set_match_flags(uint32_t *options, MATCH_FLAGS, SUB_OPTS)
{
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, notbol, PCRE2_NOTBOL);
set_opt(options, noteol, PCRE2_NOTEOL);
set_opt(options, notempty, PCRE2_NOTEMPTY);
set_opt(options, notempty_atstart, PCRE2_NOTEMPTY_ATSTART);
set_opt(options, no_jit, PCRE2_NO_JIT);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
set_opt(options, suball, PCRE2_SUBSTITUTE_GLOBAL);
set_opt(options, sub_extended, PCRE2_SUBSTITUTE_EXTENDED);
set_opt(options, unknown_unset, PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(options, unset_empty, PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
static void
match_call_free(void *priv)
{
struct match_call *match_call;
if (priv == NULL)
return;
CAST_OBJ(match_call, priv, VMOD_PCRE2_MATCH_CALL_MAGIC);
if (match_call->cctx != NULL)
pcre2_compile_context_free(match_call->cctx);
if (match_call->mctx != NULL)
pcre2_match_context_free(match_call->mctx);
FREE_OBJ(match_call);
}
struct match_call *
get_match_opts(VRT_CTX, struct vmod_priv *priv, MATCH_CTX_OPTS, MATCH_FLAGS,
SUB_OPTS, const char *context, const char *caller)
{
struct match_call *match_opts;
pcre2_match_context *mctx;
if (priv->priv != NULL) {
CAST_OBJ(match_opts, priv->priv, VMOD_PCRE2_MATCH_CALL_MAGIC);
return match_opts;
}
if (!check_uint32_range(ctx, match_limit, "match_limit", context,
caller))
return NULL;
if (!check_uint32_range(ctx, offset_limit, "offset_limit", context,
caller))
return NULL;
if (!check_uint32_range(ctx, recursion_limit, "recursion_limit",
context, caller))
return NULL;
if ((mctx = pcre2_match_context_create(NULL)) == NULL) {
VERRNOMEM(ctx, "creating match context in %s%s", context,
caller);
return NULL;
}
SET_CTX_PARAM(mctx, match_limit, uint32_t);
SET_CTX_PARAM(mctx, offset_limit, uint32_t);
SET_CTX_PARAM(mctx, recursion_limit, uint32_t);
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
if (match_opts == NULL) {
VERRNOMEM(ctx, "allocating call-scoped match options in %s%s",
context, caller);
return NULL;
}
match_opts->mctx = mctx;
set_match_flags(&match_opts->match_options, MATCH_SUB_FLAGS_PARAMS);
priv->priv = match_opts;
priv->free = match_call_free;
priv->len = sizeof(*match_opts);
return match_opts;
}
pcre2_code *
compile(VRT_CTX, pcre2_compile_context * restrict const cctx,
VCL_STRING const restrict pattern, uint32_t options, int do_jit,
const char * const restrict context, const char * const restrict caller)
{
pcre2_code *code;
int err_code = 0;
PCRE2_SIZE err_offset;
/* XXX set the length via parameter */
code = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
options, &err_code, &err_offset, cctx);
if (code == NULL) {
char *msg, *offset_msg;
uintptr_t snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "Cannot compile '%s' in %s%s",
pattern, context, caller)) == NULL)
msg = "";
if ((offset_msg = WS_Printf(ctx->ws, " at offset %zu",
err_offset)) == NULL)
offset_msg = "";
report_pcre2_err(ctx, err_code, msg, offset_msg);
WS_Reset(ctx->ws, snap);
return NULL;
}
if (do_jit) {
int ret;
/* XXX check option compatibility; disable via param */
/* XXX set complete or soft/hard partial via param */
options |= PCRE2_JIT_COMPLETE;
ret = pcre2_jit_compile(code, options);
if (ret != 0) {
char *msg;
uintptr_t snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "Cannot jit-compile "
"'%s' in %s%s", pattern, context,
caller)) == NULL)
msg = "";
report_pcre2_err(ctx, ret, msg, "");
WS_Reset(ctx->ws, snap);
return NULL;
}
}
return code;
}
......@@ -27,123 +27,10 @@
#include "config.h"
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <locale.h>
#include "vcl.h"
#include "cache/cache.h"
#include "vrt.h"
#include "vas.h"
#include "vdef.h"
#include "vmod_pcre2.h"
#include "vcc_if.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#define ERR(ctx, msg) \
errmsg((ctx), "vmod pcre2 error: " msg)
#define VERR(ctx, fmt, ...) \
errmsg((ctx), "vmod pcre2 error: " fmt, __VA_ARGS__)
#define VERRNOMEM(ctx, fmt, ...) \
VERR((ctx), fmt ", out of space", __VA_ARGS__)
#define ERRNOMEM(ctx, msg) \
ERR((ctx), msg ", out of space")
#define INIT(ctx) (((ctx)->method & VCL_MET_INIT) != 0)
#define SET_CTX_PARAM(ctx, param, type) do { \
if ((param) != 0) AZ(pcre2_set_##param((ctx), (type)(param))); \
} while(0)
#define COMPILE_OPTS \
VCL_BOOL allow_empty_class, VCL_BOOL anchored, VCL_ENUM bsrs, \
VCL_BOOL alt_bsux, VCL_BOOL alt_circumflex, \
VCL_BOOL alt_verbnames, VCL_BOOL caseless, \
VCL_BOOL dollar_endonly, VCL_BOOL dotall, \
VCL_BOOL dupnames, VCL_BOOL extended, VCL_BOOL firstline, \
VCL_STRING locale, VCL_BOOL match_unset_backref, \
VCL_INT max_pattern_length, VCL_BOOL multiline, \
VCL_BOOL never_backslash_c, VCL_BOOL never_ucp, \
VCL_BOOL never_utf, VCL_ENUM newlines, \
VCL_BOOL no_auto_capture, VCL_BOOL no_auto_possess, \
VCL_BOOL no_dotstar_anchor, VCL_BOOL no_start_optimize, \
VCL_BOOL no_utf_check, VCL_INT parens_nest_limit, \
VCL_BOOL ucp, VCL_BOOL ungreedy, \
VCL_BOOL use_offset_limit, VCL_BOOL utf
#define COMPILE_CTX_OPTS \
VCL_ENUM bsrs, VCL_STRING locale, VCL_INT max_pattern_length, \
VCL_ENUM newlines, VCL_INT parens_nest_limit
#define COMPILE_CTX_PARAMS \
bsrs, locale, max_pattern_length, newlines, parens_nest_limit
#define COMPILE_FLAGS \
VCL_BOOL allow_empty_class, VCL_BOOL anchored, VCL_BOOL alt_bsux, \
VCL_BOOL alt_circumflex, VCL_BOOL alt_verbnames, \
VCL_BOOL caseless, VCL_BOOL dollar_endonly, \
VCL_BOOL dotall, VCL_BOOL dupnames, VCL_BOOL extended, \
VCL_BOOL firstline, VCL_BOOL match_unset_backref, \
VCL_BOOL multiline, VCL_BOOL never_backslash_c, \
VCL_BOOL never_ucp, VCL_BOOL never_utf, \
VCL_BOOL no_auto_capture, VCL_BOOL no_auto_possess, \
VCL_BOOL no_dotstar_anchor, VCL_BOOL no_start_optimize, \
VCL_BOOL no_utf_check, VCL_BOOL ucp, VCL_BOOL ungreedy, \
VCL_BOOL use_offset_limit, VCL_BOOL utf
#define COMPILE_FLAGS_PARAMS \
allow_empty_class, anchored, alt_bsux, alt_circumflex, \
alt_verbnames, caseless, dollar_endonly, dotall, \
dupnames, extended, firstline, match_unset_backref, \
multiline, never_backslash_c, never_ucp, never_utf, \
no_auto_capture, no_auto_possess, no_dotstar_anchor, \
no_start_optimize, no_utf_check, ucp, ungreedy, \
use_offset_limit, utf
#define MATCH_OPTS \
VCL_INT len, VCL_BOOL anchored, VCL_INT match_limit, \
VCL_INT offset_limit, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check, \
VCL_INT recursion_limit
/* Doesn't repeat the anchored and no_utf_check options */
#define MATCHF_OPTS \
VCL_INT len, VCL_INT match_limit, VCL_INT offset_limit, \
VCL_BOOL notbol, VCL_BOOL noteol, VCL_BOOL notempty, \
VCL_BOOL notempty_atstart, VCL_BOOL no_jit, \
VCL_INT recursion_limit
#define MATCH_CTX_OPTS \
VCL_INT match_limit, VCL_INT offset_limit, VCL_INT recursion_limit
#define MATCH_CTX_PARAMS \
match_limit, offset_limit, recursion_limit
#define MATCH_FLAGS \
VCL_BOOL anchored, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check
#define MATCH_SUB_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check, suball, sub_extended, unknown_unset, \
unset_empty \
#define MATCH_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check, 0, 0, 0, 0
#define SUB_OPTS \
VCL_BOOL suball, VCL_BOOL sub_extended, VCL_BOOL unknown_unset, \
VCL_BOOL unset_empty
struct vmod_pcre2_regex {
unsigned magic;
#define VMOD_PCRE2_REGEX_MAGIC 0x3adb2a78
......@@ -163,20 +50,6 @@ struct task {
pcre2_match_data *mdata;
};
/*
* PRIV_CALL scope. The match context and options are used by the match
* method and function. The compile context and options are used by the
* match function.
*/
struct match_call {
unsigned magic;
#define VMOD_PCRE2_MATCH_CALL_MAGIC 0x60e5bd33
pcre2_match_context *mctx;
pcre2_compile_context *cctx;
uint32_t match_options;
uint32_t compile_options;
};
enum ref_e {
NUMBERED = 0,
NAMED = 1,
......@@ -184,81 +57,6 @@ enum ref_e {
static int have_jit = 0;
void
errmsg(VRT_CTX, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
if (ctx->vsl)
VSLbv(ctx->vsl, SLT_VCL_Error, fmt, args);
else
VSLv(SLT_VCL_Error, 0, fmt, args);
va_end(args);
if (ctx->method == VCL_MET_INIT) {
AN(ctx->msg);
va_start(args, fmt);
VSB_vprintf(ctx->msg, fmt, args);
VSB_putc(ctx->msg, '\n');
va_end(args);
VRT_handling(ctx, VCL_RET_FAIL);
}
}
static void
report_pcre2_err(VRT_CTX, int errcode, const char * const restrict msg,
const char * const restrict post)
{
int ret;
uintptr_t snap = WS_Snapshot(ctx->ws);
unsigned bytes = WS_Reserve(ctx->ws, 0);
char *buf = WS_Front(ctx->ws);
ret = pcre2_get_error_message(errcode, (PCRE2_UCHAR *)buf,
(PCRE2_SIZE)bytes);
if (ret == PCRE2_ERROR_BADDATA) {
WS_ReleaseP(ctx->ws, buf);
VERR(ctx, "%s (unknown error code)%s", msg, post);
}
else if (ret == PCRE2_ERROR_NOMEMORY) {
unsigned len = strlen(buf) + 1;
if (len > bytes) {
WS_Release(ctx->ws, 0);
ERRNOMEM(ctx, "allocating workspace for pcre2 error "
"message");
VERR(ctx, "%s: libpcre2 error code %d%s", msg, errcode,
post);
}
else {
WS_Release(ctx->ws, len);
VERR(ctx, "%s: %s (truncated error message)%s", msg,
buf, post);
}
}
else {
WS_Release(ctx->ws, ret);
VERR(ctx, "%s: %s%s", msg, buf, post);
}
WS_Reset(ctx->ws, snap);
}
static inline int
check_uint32_range(VRT_CTX, long long limit, const char * const restrict name,
const char * const restrict context,
const char * const restrict caller)
{
if (limit < 0 || limit > UINT32_MAX) {
VERR(ctx, "%s (%lld) out of range in %s%s (must be >= 0 and "
"<= %" PRIu32 ")", name, limit, context, caller,
UINT32_MAX);
return 0;
}
return 1;
}
/* Event function */
int __match_proto__(vmod_event_f)
......@@ -301,206 +99,6 @@ ws_free(void *ptr, void *ws)
WS_Assert_Allocated(ws, ptr, 0);
}
static void
match_call_free(void *priv)
{
struct match_call *match_call;
if (priv == NULL)
return;
CAST_OBJ(match_call, priv, VMOD_PCRE2_MATCH_CALL_MAGIC);
if (match_call->cctx != NULL)
pcre2_compile_context_free(match_call->cctx);
if (match_call->mctx != NULL)
pcre2_match_context_free(match_call->mctx);
FREE_OBJ(match_call);
}
static inline void
set_opt(uint32_t *options, VCL_BOOL vmod_opt, uint32_t pcre2_opt)
{
if (vmod_opt)
*options |= pcre2_opt;
}
static inline void
set_compile_flags(uint32_t *options, COMPILE_FLAGS)
{
/* XXX check illegal combinations such as never_ucp && ucp ... ? */
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, allow_empty_class, PCRE2_ALLOW_EMPTY_CLASS);
set_opt(options, alt_bsux, PCRE2_ALT_BSUX);
set_opt(options, alt_circumflex, PCRE2_ALT_CIRCUMFLEX);
set_opt(options, alt_verbnames, PCRE2_ALT_VERBNAMES);
set_opt(options, caseless, PCRE2_CASELESS);
set_opt(options, dollar_endonly, PCRE2_DOLLAR_ENDONLY);
set_opt(options, dotall, PCRE2_DOTALL);
set_opt(options, dupnames, PCRE2_DUPNAMES);
set_opt(options, extended, PCRE2_EXTENDED);
set_opt(options, firstline, PCRE2_FIRSTLINE);
set_opt(options, match_unset_backref, PCRE2_MATCH_UNSET_BACKREF);
set_opt(options, multiline, PCRE2_MULTILINE);
set_opt(options, never_backslash_c, PCRE2_NEVER_BACKSLASH_C);
set_opt(options, never_ucp, PCRE2_NEVER_UCP);
set_opt(options, never_utf, PCRE2_NEVER_UTF);
set_opt(options, no_auto_capture, PCRE2_NO_AUTO_CAPTURE);
set_opt(options, no_auto_possess, PCRE2_NO_AUTO_POSSESS);
set_opt(options, no_dotstar_anchor, PCRE2_NO_DOTSTAR_ANCHOR);
set_opt(options, no_start_optimize, PCRE2_NO_START_OPTIMIZE);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
set_opt(options, ucp, PCRE2_UCP);
set_opt(options, ungreedy, PCRE2_UNGREEDY);
set_opt(options, use_offset_limit, PCRE2_USE_OFFSET_LIMIT);
set_opt(options, utf, PCRE2_UTF);
}
static pcre2_compile_context *
get_compile_opts(VRT_CTX, COMPILE_CTX_OPTS, COMPILE_FLAGS, uint32_t *options,
const char * restrict const context,
const char * restrict const caller)
{
pcre2_compile_context *ccontext;
uint32_t val;
if (!check_uint32_range(ctx, parens_nest_limit, "parens_nest_limit",
context, caller))
return NULL;
if (max_pattern_length < 0) {
VERR(ctx, "max_pattern_len (%lld) must be >= 0 in %s%s",
(long long)max_pattern_length, context, caller);
return NULL;
}
if ((ccontext = pcre2_compile_context_create(NULL)) == NULL) {
VERR(ctx, "failed to create compile context in %s%s", context,
caller);
return NULL;
}
SET_CTX_PARAM(ccontext, max_pattern_length, PCRE2_SIZE);
SET_CTX_PARAM(ccontext, parens_nest_limit, uint32_t);
if (bsrs != NULL) {
if (strcmp("ANYCRLF", bsrs) == 0)
val = PCRE2_BSR_ANYCRLF;
else if (strcmp("UNICODE", bsrs) == 0)
val = PCRE2_BSR_UNICODE;
else
WRONG("Illegal bsr enum value");
AZ(pcre2_set_bsr(ccontext, val));
}
if (locale != NULL) {
const unsigned char *tables;
char *saved_lc = setlocale(LC_CTYPE, NULL);
AN(saved_lc);
if (setlocale(LC_CTYPE, locale) == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERR(ctx, "Cannot set locale %s in %s%s", locale,
context, caller);
return NULL;
}
tables = pcre2_maketables(NULL);
if (tables == NULL) {
AN(setlocale(LC_CTYPE, saved_lc));
VERRNOMEM(ctx, "Creating tables for locale %s in %s%s",
locale, context, caller);
return NULL;
}
AZ(pcre2_set_character_tables(ccontext, tables));
AN(setlocale(LC_CTYPE, saved_lc));
}
if (newlines != NULL) {
if (strcmp("CR", newlines) == 0)
val = PCRE2_NEWLINE_CR;
else if (strcmp("LF", newlines) == 0)
val = PCRE2_NEWLINE_LF;
else if (strcmp("CRLF", newlines) == 0)
val = PCRE2_NEWLINE_CRLF;
else if (strcmp("ANYCRLF", newlines) == 0)
val = PCRE2_NEWLINE_ANYCRLF;
else if (strcmp("ANY", newlines) == 0)
val = PCRE2_NEWLINE_ANY;
else
WRONG("Illegal newline enum value");
AZ(pcre2_set_newline(ccontext, val));
}
set_compile_flags(options, COMPILE_FLAGS_PARAMS);
return ccontext;
}
static pcre2_match_context *
get_match_context(VRT_CTX, MATCH_CTX_OPTS, const char * restrict const context,
const char * restrict const caller)
{
pcre2_match_context *mctx;
if (!check_uint32_range(ctx, match_limit, "match_limit", context,
caller))
return NULL;
if (!check_uint32_range(ctx, offset_limit, "offset_limit", context,
caller))
return NULL;
if (!check_uint32_range(ctx, recursion_limit, "recursion_limit",
context, caller))
return NULL;
if ((mctx = pcre2_match_context_create(NULL)) == NULL) {
VERRNOMEM(ctx, "creating match context in %s%s", context,
caller);
return NULL;
}
SET_CTX_PARAM(mctx, match_limit, uint32_t);
SET_CTX_PARAM(mctx, offset_limit, uint32_t);
SET_CTX_PARAM(mctx, recursion_limit, uint32_t);
return mctx;
}
static inline void
set_match_flags(uint32_t *options, MATCH_FLAGS, SUB_OPTS)
{
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, notbol, PCRE2_NOTBOL);
set_opt(options, noteol, PCRE2_NOTEOL);
set_opt(options, notempty, PCRE2_NOTEMPTY);
set_opt(options, notempty_atstart, PCRE2_NOTEMPTY_ATSTART);
set_opt(options, no_jit, PCRE2_NO_JIT);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
set_opt(options, suball, PCRE2_SUBSTITUTE_GLOBAL);
set_opt(options, sub_extended, PCRE2_SUBSTITUTE_EXTENDED);
set_opt(options, unknown_unset, PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(options, unset_empty, PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
static inline struct match_call *
get_match_opts(VRT_CTX, struct vmod_priv *priv, MATCH_CTX_OPTS, MATCH_FLAGS,
SUB_OPTS, const char *context, const char *caller)
{
struct match_call *match_opts;
pcre2_match_context *mctx;
if (priv->priv != NULL) {
CAST_OBJ(match_opts, priv->priv, VMOD_PCRE2_MATCH_CALL_MAGIC);
return match_opts;
}
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS, context, caller))
== NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
if (match_opts == NULL) {
VERRNOMEM(ctx, "allocating call-scoped match options in %s%s",
context, caller);
return NULL;
}
match_opts->mctx = mctx;
set_match_flags(&match_opts->match_options, MATCH_SUB_FLAGS_PARAMS);
priv->priv = match_opts;
priv->free = match_call_free;
priv->len = sizeof(*match_opts);
return match_opts;
}
static inline struct task *
get_task(VRT_CTX, struct vmod_priv *priv_task,
const char * const restrict context,
......@@ -546,55 +144,6 @@ get_task(VRT_CTX, struct vmod_priv *priv_task,
return match_task;
}
static pcre2_code *
compile(VRT_CTX, pcre2_compile_context * restrict const cctx,
VCL_STRING const restrict pattern, uint32_t options, int do_jit,
const char * const restrict context, const char * const restrict caller)
{
pcre2_code *code;
int err_code = 0;
PCRE2_SIZE err_offset;
/* XXX set the length via parameter */
code = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
options, &err_code, &err_offset, cctx);
if (code == NULL) {
char *msg, *offset_msg;
uintptr_t snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "Cannot compile '%s' in %s%s",
pattern, context, caller)) == NULL)
msg = "";
if ((offset_msg = WS_Printf(ctx->ws, " at offset %zu",
err_offset)) == NULL)
offset_msg = "";
report_pcre2_err(ctx, err_code, msg, offset_msg);
WS_Reset(ctx->ws, snap);
return NULL;
}
if (do_jit) {
int ret;
/* XXX check option compatibility; disable via param */
/* XXX set complete or soft/hard partial via param */
options |= PCRE2_JIT_COMPLETE;
ret = pcre2_jit_compile(code, options);
if (ret != 0) {
char *msg;
uintptr_t snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "Cannot jit-compile "
"'%s' in %s%s", pattern, context,
caller)) == NULL)
msg = "";
report_pcre2_err(ctx, ret, msg, "");
WS_Reset(ctx->ws, snap);
return NULL;
}
}
return code;
}
static inline VCL_BOOL
match(VRT_CTX, pcre2_code * restrict const code,
VCL_STRING restrict const subject, VCL_INT len, const uint32_t options,
......
/*-
* Copyright 2017 UPLEX - Nils Goroll Systemoptimierung
* All rights reserved.
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include "vcl.h"
#include "cache/cache.h"
#include "vrt.h"
#include "vas.h"
#include "vdef.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#define ERR(ctx, msg) \
errmsg((ctx), "vmod pcre2 error: " msg)
#define VERR(ctx, fmt, ...) \
errmsg((ctx), "vmod pcre2 error: " fmt, __VA_ARGS__)
#define VERRNOMEM(ctx, fmt, ...) \
VERR((ctx), fmt ", out of space", __VA_ARGS__)
#define ERRNOMEM(ctx, msg) \
ERR((ctx), msg ", out of space")
#define COMPILE_OPTS \
VCL_BOOL allow_empty_class, VCL_BOOL anchored, VCL_ENUM bsrs, \
VCL_BOOL alt_bsux, VCL_BOOL alt_circumflex, \
VCL_BOOL alt_verbnames, VCL_BOOL caseless, \
VCL_BOOL dollar_endonly, VCL_BOOL dotall, \
VCL_BOOL dupnames, VCL_BOOL extended, VCL_BOOL firstline, \
VCL_STRING locale, VCL_BOOL match_unset_backref, \
VCL_INT max_pattern_length, VCL_BOOL multiline, \
VCL_BOOL never_backslash_c, VCL_BOOL never_ucp, \
VCL_BOOL never_utf, VCL_ENUM newlines, \
VCL_BOOL no_auto_capture, VCL_BOOL no_auto_possess, \
VCL_BOOL no_dotstar_anchor, VCL_BOOL no_start_optimize, \
VCL_BOOL no_utf_check, VCL_INT parens_nest_limit, \
VCL_BOOL ucp, VCL_BOOL ungreedy, \
VCL_BOOL use_offset_limit, VCL_BOOL utf
#define COMPILE_CTX_OPTS \
VCL_ENUM bsrs, VCL_STRING locale, VCL_INT max_pattern_length, \
VCL_ENUM newlines, VCL_INT parens_nest_limit
#define COMPILE_CTX_PARAMS \
bsrs, locale, max_pattern_length, newlines, parens_nest_limit
#define COMPILE_FLAGS \
VCL_BOOL allow_empty_class, VCL_BOOL anchored, VCL_BOOL alt_bsux, \
VCL_BOOL alt_circumflex, VCL_BOOL alt_verbnames, \
VCL_BOOL caseless, VCL_BOOL dollar_endonly, \
VCL_BOOL dotall, VCL_BOOL dupnames, VCL_BOOL extended, \
VCL_BOOL firstline, VCL_BOOL match_unset_backref, \
VCL_BOOL multiline, VCL_BOOL never_backslash_c, \
VCL_BOOL never_ucp, VCL_BOOL never_utf, \
VCL_BOOL no_auto_capture, VCL_BOOL no_auto_possess, \
VCL_BOOL no_dotstar_anchor, VCL_BOOL no_start_optimize, \
VCL_BOOL no_utf_check, VCL_BOOL ucp, VCL_BOOL ungreedy, \
VCL_BOOL use_offset_limit, VCL_BOOL utf
#define COMPILE_FLAGS_PARAMS \
allow_empty_class, anchored, alt_bsux, alt_circumflex, \
alt_verbnames, caseless, dollar_endonly, dotall, \
dupnames, extended, firstline, match_unset_backref, \
multiline, never_backslash_c, never_ucp, never_utf, \
no_auto_capture, no_auto_possess, no_dotstar_anchor, \
no_start_optimize, no_utf_check, ucp, ungreedy, \
use_offset_limit, utf
#define MATCH_OPTS \
VCL_INT len, VCL_BOOL anchored, VCL_INT match_limit, \
VCL_INT offset_limit, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check, \
VCL_INT recursion_limit
/* Doesn't repeat the anchored and no_utf_check options */
#define MATCHF_OPTS \
VCL_INT len, VCL_INT match_limit, VCL_INT offset_limit, \
VCL_BOOL notbol, VCL_BOOL noteol, VCL_BOOL notempty, \
VCL_BOOL notempty_atstart, VCL_BOOL no_jit, \
VCL_INT recursion_limit
#define MATCH_CTX_OPTS \
VCL_INT match_limit, VCL_INT offset_limit, VCL_INT recursion_limit
#define MATCH_CTX_PARAMS \
match_limit, offset_limit, recursion_limit
#define MATCH_FLAGS \
VCL_BOOL anchored, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check
#define MATCH_SUB_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check, suball, sub_extended, unknown_unset, \
unset_empty
#define MATCH_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check, 0, 0, 0, 0
#define SUB_OPTS \
VCL_BOOL suball, VCL_BOOL sub_extended, VCL_BOOL unknown_unset, \
VCL_BOOL unset_empty
/*
* PRIV_CALL scope. The match context and options are used by the match
* method and function. The compile context and options are used by the
* match function.
*/
struct match_call {
unsigned magic;
#define VMOD_PCRE2_MATCH_CALL_MAGIC 0x60e5bd33
pcre2_match_context *mctx;
pcre2_compile_context *cctx;
uint32_t match_options;
uint32_t compile_options;
};
void errmsg(VRT_CTX, const char *fmt, ...);
void report_pcre2_err(VRT_CTX, int errcode, const char * const restrict msg,
const char * const restrict post);
pcre2_compile_context *get_compile_opts(VRT_CTX, COMPILE_CTX_OPTS,
COMPILE_FLAGS, uint32_t *options,
const char * restrict const context,
const char * restrict const caller);
struct match_call *get_match_opts(VRT_CTX, struct vmod_priv *priv,
MATCH_CTX_OPTS, MATCH_FLAGS, SUB_OPTS,
const char *context, const char *caller);
pcre2_code *compile(VRT_CTX, pcre2_compile_context * restrict const cctx,
VCL_STRING const restrict pattern, uint32_t options,
int do_jit, const char * const restrict context,
const char * const restrict caller);
static inline int
check_uint32_range(VRT_CTX, long long limit, const char * const restrict name,
const char * const restrict context,
const char * const restrict caller)
{
if (limit < 0 || limit > UINT32_MAX) {
VERR(ctx, "%s (%lld) out of range in %s%s (must be >= 0 and "
"<= %" PRIu32 ")", name, limit, context, caller,
UINT32_MAX);
return 0;
}
return 1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment