Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
D
dcs_classifier
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
uplex-varnish
dcs_classifier
Commits
2c1aaa3d
Commit
2c1aaa3d
authored
Aug 13, 2014
by
Nils Goroll
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tabify and cleanup whitespace
parent
fc5df947
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
115 additions
and
115 deletions
+115
-115
gen_dcs_classifier.pl
src/gen_dcs_classifier.pl
+115
-115
No files found.
src/gen_dcs_classifier.pl
View file @
2c1aaa3d
...
...
@@ -40,8 +40,8 @@ use Fcntl qw(SEEK_SET);
# use Carp::Assert;
sub
assert
($;$) {
unless
(
$_
[
0
])
{
require
Carp
;
Carp::
confess
(
_fail_msg
(
$_
[
1
])
);
require
Carp
;
Carp::
confess
(
_fail_msg
(
$_
[
1
])
);
}
return
undef
;
}
...
...
@@ -105,7 +105,7 @@ $filenames[F_CHECKSUM] = 'dcs_classifier.checksum';
}
(
F_MIN
..
F_MAX
);
}
}
use
constant
{
VCL_TOP
=>
undef
...
...
@@ -143,7 +143,7 @@ sub Prefix {
}
my
$minlen
;
my
@d
=
map
{
my
@d
=
map
{
my
@split
=
split
(
//
,
$_
);
my
$l
=
scalar
(
@split
);
if
(
defined
(
$minlen
))
{
...
...
@@ -166,7 +166,7 @@ sub Prefix {
$p
.=
$c
;
$i
++
;
}
$p
}
...
...
@@ -180,14 +180,14 @@ sub Prefix {
sub
parse_token_code_gen
{
my
$varnish
=
shift
;
my
$prevpos
=
shift
;
# 0, 1 ...
my
$prev
=
shift
;
# "G", "GE", ...
my
$term
=
shift
;
# function(macro) name to check if a symbol is terminator
my
$cb
=
shift
;
# function(macro) to call for each match
my
$prevpos
=
shift
;
# 0, 1 ...
my
$prev
=
shift
;
# "G", "GE", ...
my
$term
=
shift
;
# function(macro) name to check if a symbol is terminator
my
$cb
=
shift
;
# function(macro) to call for each match
assert
((
scalar
(
@_
)
%
2
)
==
0
);
my
%
symbs
=
@_
;
# symbol => result, ...
my
%
symbs
=
@_
;
# symbol => result, ...
my
$nsymbs
=
scalar
(
keys
%
symbs
);
...
...
@@ -204,38 +204,38 @@ sub parse_token_code_gen {
}
if
(
$nsymbs
==
1
)
{
my
(
$m
,
$r
)
=
%
symbs
;
my
@cond
;
# optimize for just this one string
my
$p
;
for
(
$p
=
$pos
;
$p
<
length
(
$m
);
$p
++
)
{
push
@cond
,
'(m['
.
$p
.
"] == '"
.
substr
(
$m
,
$p
,
1
)
.
"')"
;
}
push
@cond
,
'('
.
$term
.
'(m['
.
$p
.
"]))"
;
_VCL
(
$varnish
,
VCL_TOP
,
"\t//"
.
$m
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'if ('
.
join
(
" && "
,
@cond
)
.
') {'
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
' '
.
$cb
.
'('
.
$p
.
', '
.
$r
.
");\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
"}\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'goto done;'
.
"\n"
);
my
(
$m
,
$r
)
=
%
symbs
;
my
@cond
;
# optimize for just this one string
my
$p
;
for
(
$p
=
$pos
;
$p
<
length
(
$m
);
$p
++
)
{
push
@cond
,
'(m['
.
$p
.
"] == '"
.
substr
(
$m
,
$p
,
1
)
.
"')"
;
}
push
@cond
,
'('
.
$term
.
'(m['
.
$p
.
"]))"
;
_VCL
(
$varnish
,
VCL_TOP
,
"\t//"
.
$m
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'if ('
.
join
(
" && "
,
@cond
)
.
') {'
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
' '
.
$cb
.
'('
.
$p
.
', '
.
$r
.
");\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
"}\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'goto done;'
.
"\n"
);
}
else
{
my
@case
;
my
@tocall
;
{
## terminating keys
foreach
my
$m
(
grep
{
(
length
(
$_
)
==
$pos
)
}
sort
keys
%
symbs
)
{
my
$r
=
delete
$symbs
{
$m
};
_VCL
(
$varnish
,
VCL_TOP
,
"\t//"
.
$m
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'if ('
.
$term
.
'(m['
.
$pos
.
']))'
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t
"
.
$cb
.
'('
.
$pos
.
', '
.
$r
.
");\n"
);
}
}
{
## non-terminating keys
my
@keys
=
(
sort
keys
%
symbs
);
my
@case
;
my
@tocall
;
{
## terminating keys
foreach
my
$m
(
grep
{
(
length
(
$_
)
==
$pos
)
}
sort
keys
%
symbs
)
{
my
$r
=
delete
$symbs
{
$m
};
_VCL
(
$varnish
,
VCL_TOP
,
"\t//"
.
$m
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'if ('
.
$term
.
'(m['
.
$pos
.
']))'
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t
"
.
$cb
.
'('
.
$pos
.
', '
.
$r
.
");\n"
);
}
}
{
## non-terminating keys
my
@keys
=
(
sort
keys
%
symbs
);
# get the longest common prefix from here
my
@prestr
=
map
(
substr
(
$_
,
$pos
),
@keys
);
...
...
@@ -256,47 +256,47 @@ sub parse_token_code_gen {
## terminating keys
foreach
my
$m
(
grep
{
(
length
(
$_
)
==
$pos
)
}
@keys
)
{
my
$r
=
delete
$symbs
{
$m
};
_VCL
(
$varnish
,
VCL_TOP
,
"\t//"
.
$m
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'if ('
.
$term
.
'(m['
.
$pos
.
']))'
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t "
.
$cb
.
'('
.
$pos
.
', '
.
$r
.
");\n"
);
}
}
@keys
=
(
sort
keys
%
symbs
);
# hash by this char
my
%
h
;
foreach
my
$k
(
@keys
)
{
assert
(
length
(
$k
)
>
$pos
,
"length($k) > $pos"
);
push
@
{
$h
{
substr
(
$k
,
$pos
,
1
)}},
(
$k
);
}
foreach
my
$k
(
sort
keys
%
h
)
{
my
$la
=
label
(
'_'
.
$pos
.
$prev
.
$k
);
my
@me
=
@
{
$h
{
$k
}};
push
@case
,
(
"case '"
.
$k
.
"':\tgoto "
.
$la
.
";\t// "
.
join
(
", "
,
@me
));
push
@tocall
,
[
$prev
.
$k
,
\
@me
];
}
}
push
@case
,
(
"default:\tgoto done;"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'switch (m['
.
$pos
.
"]) {\n\t"
);
_VCL
(
$varnish
,
VCL_TOP
,
join
(
"\n\t"
,
@case
)
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t}\n"
);
if
(
scalar
(
@tocall
))
{
map
{
parse_token_code_gen
(
$varnish
,
$pos
,
$_
->
[
0
],
$term
,
@keys
=
(
sort
keys
%
symbs
);
# hash by this char
my
%
h
;
foreach
my
$k
(
@keys
)
{
assert
(
length
(
$k
)
>
$pos
,
"length($k) > $pos"
);
push
@
{
$h
{
substr
(
$k
,
$pos
,
1
)}},
(
$k
);
}
foreach
my
$k
(
sort
keys
%
h
)
{
my
$la
=
label
(
'_'
.
$pos
.
$prev
.
$k
);
my
@me
=
@
{
$h
{
$k
}};
push
@case
,
(
"case '"
.
$k
.
"':\tgoto "
.
$la
.
";\t// "
.
join
(
", "
,
@me
));
push
@tocall
,
[
$prev
.
$k
,
\
@me
];
}
}
push
@case
,
(
"default:\tgoto done;"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t"
.
'switch (m['
.
$pos
.
"]) {\n\t"
);
_VCL
(
$varnish
,
VCL_TOP
,
join
(
"\n\t"
,
@case
)
.
"\n"
);
_VCL
(
$varnish
,
VCL_TOP
,
"\t}\n"
);
if
(
scalar
(
@tocall
))
{
map
{
parse_token_code_gen
(
$varnish
,
$pos
,
$_
->
[
0
],
$term
,
$cb
,
(
map
{
$_
=>
$symbs
{
$_
};
}
@
{
$_
->
[
1
]}));
}
@tocall
;
}
(
map
{
$_
=>
$symbs
{
$_
};
}
@
{
$_
->
[
1
]}));
}
@tocall
;
}
}
}
...
...
@@ -657,12 +657,12 @@ my %special_entries;
$e
[
ENTRY_ID
]
=
0
;
$e
[
ENTRY_ORDER
]
=
$line
;
$e
[
ENTRY_COMMON_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_INITMASK
]
=
0
;
$e
[
ENTRY_TYPE
]
=
type_enum
(
$s
);
$e
[
ENTRY_TYPE
]
=
type_enum
(
$s
);
$e
[
ENTRY_KEY
]
=
\
$s
;
$e
[
ENTRY_SUBKEYS
]
=
[]
;
$entries
[
$line
++
]
=
\
@e
;
$entries
[
$line
++
]
=
\
@e
;
}
{
my
@e
;
...
...
@@ -672,12 +672,12 @@ my %special_entries;
$e
[
ENTRY_ID
]
=
0
;
$e
[
ENTRY_ORDER
]
=
$line
;
$e
[
ENTRY_COMMON_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_MATCHMASK
]
=
0xffffffff
;
$e
[
ENTRY_INITMASK
]
=
0
;
$e
[
ENTRY_TYPE
]
=
type_enum
(
"Mobile Phone"
);
$e
[
ENTRY_TYPE
]
=
type_enum
(
"Mobile Phone"
);
$e
[
ENTRY_KEY
]
=
\
$s
;
$e
[
ENTRY_SUBKEYS
]
=
[]
;
$entries
[
$line
++
]
=
\
@e
;
$entries
[
$line
++
]
=
\
@e
;
}
$special_entries
{
enum_name
(
'NB_E_SPECIAL_'
,
'LIMIT'
)}
=
$line
;
}
...
...
@@ -723,10 +723,10 @@ foreach (split(/[\n\r\f]/, $$dbref)) {
$e
[
ENTRY_ID
]
=
$line
;
$e
[
ENTRY_ORDER
]
=
$line
;
$e
[
ENTRY_ACTIVE
]
=
0
;
$e
[
ENTRY_TYPE
]
=
type_enum
(
$type
);
$e
[
ENTRY_TYPE
]
=
type_enum
(
$type
);
$e
[
ENTRY_KEY
]
=
\
$key
;
$e
[
ENTRY_SUBKEYS
]
=
\
@sks
;
$e
[
ENTRY_INITMASK
]
=
0
;
$e
[
ENTRY_INITMASK
]
=
0
;
$entries
[
$line
++
]
=
\
@e
;
}
...
...
@@ -734,7 +734,7 @@ foreach (split(/[\n\r\f]/, $$dbref)) {
my
%
common_subkeys
;
{
# common_subkeys can never be negative
my
@sk_ord
=
sort
{
$subkeys_count
{
$b
}
<=>
$subkeys_count
{
$a
}
}
my
@sk_ord
=
sort
{
$subkeys_count
{
$b
}
<=>
$subkeys_count
{
$a
}
}
grep
{
$_
!~
/^!/
}
keys
%
subkeys_count
;
...
...
@@ -852,9 +852,8 @@ sub process_entry($) {
}
map
{
fixup_entry
(
$_
);
}
@entries
;
@entries
=
sort
{
$a
->
[
ENTRY_ORDER
]
<=>
$b
->
[
ENTRY_ORDER
]
}
grep
{
$_
->
[
ENTRY_ACTIVE
]
}
@entries
;
@entries
=
sort
{
$a
->
[
ENTRY_ORDER
]
<=>
$b
->
[
ENTRY_ORDER
]
}
grep
{
$_
->
[
ENTRY_ACTIVE
]
}
@entries
;
for
(
my
$i
=
0
;
$i
<=
$#entries
;
$i
++
)
{
process_entry
(
$i
);
...
...
@@ -898,7 +897,7 @@ typedef unsigned char dcs_seenmask_t;
/* values for the particular dcs db use to generate this code */
#define DCS_TYPE_COUNT $dcs_type_count
#define DCS_ENTRY_COUNT
$dcs_entry_count
#define DCS_ENTRY_COUNT
$dcs_entry_count
#define DCS_SUBKEY_COUNT $subkey_id
#define DCS_MATCHSTATE_REGMASK_SZ ((DCS_SUBKEY_COUNT / sizeof(dcs_seenmask_t)) + 1)
...
...
@@ -913,9 +912,9 @@ typedef unsigned char dcs_seenmask_t;
*/
EOF
_VCL
(
F_CLASSIFIER_H
,
VCL_TOP
,
_VCL
(
F_CLASSIFIER_H
,
VCL_TOP
,
"enum dcs_type {\n\t"
.
join
(
",\n\t"
,
join
(
",\n\t"
,
'NB_T_UNIDENTIFIED = 0'
,
sort
grep
{
$_
ne
'NB_T_UNIDENTIFIED'
}
keys
%
typeenum2nbtype
)
.
"\n};\n"
.
...
...
@@ -933,39 +932,39 @@ _VCL (F_CLASSIFIER_H, VCL_TOP, <<EOF);
*
entry_id
needs
to
be
signed
because
we
use
the
sign
to
signal
negative
matches
*/
typedef
int16_t
dcs_entry_id_t
;
#define
DCS_ENTRY_INDEX_MAX (INT16_MAX - 1)
#define
DCS_ENTRY_INDEX_MAX (INT16_MAX - 1)
typedef
uint32_t
dcs_matchmask_t
;
typedef
uint32_t
dcs_matchmask_t
;
typedef
uint16_t
dcs_subkey_id_t
;
#define DCS_SUBKEY_ID_MAX UINT16_MAX
/* this only needs the number of bits in dcs_entry.matchmask */
typedef
uint8_t
dcs_entry_subkey_id_t
;
#define DCS_ENTRY_SUBKEY_ID_MAX
(sizeof(dcs_matchmask_t) * 4 - 1)
#define DCS_ENTRY_SUBKEY_ID_MAX
(sizeof(dcs_matchmask_t) * 4 - 1)
struct
dcs_entry
{
#define DCS_ENTRY_MAGIC 0x74b979a7
const
uint32_t
magic
;
const
uint32_t
magic
;
const
dcs_matchmask_t
matchmas
k
;
const
dcs_matchmask_t
common_matchmask
;
const
dcs_entry_id_t
id
;
const
dcs_entry_id_t
index
;
/* redundant, for assertions/d
ebugging
*/
const
dcs_entry_id_t
id
;
const
dcs_entry_id_t
index
;
/* redundant, for assertions/d
ebugging
*/
const
short
active
;
const
enum
dcs_type
type
;
const
char
const
*
key
;
/* for debug */
const
enum
dcs_type
type
;
const
char
const
*
key
;
/* for debug */
};
struct
dcs_sk2e
{
/*
/*
*
if
entry
id
is
negative
,
it
's a negative match
* (and entry id needs to be inverted
*/
const dcs_entry_id_t entry;
const dcs_entry_subkey_id_t entry_subkey;
};
};
struct dcs_subkey {
...
...
@@ -974,7 +973,7 @@ struct dcs_subkey {
const dcs_subkey_id_t id;
const dcs_matchmask_t common_matchmask;
const char const
*s; /* debug */
const char const *s; /* debug */
/* number of entry ids to follow */
const dcs_entry_id_t n;
...
...
@@ -1016,7 +1015,7 @@ _VCL (F_CLASSIFIER_C, VCL_TOP, <<EOF);
#include <string.h>
#ifdef DEBUG_SK_MATCH
#include <stdio.h>
#include <stdio.h>
#endif
#include "$filenames[F_CLASSIFIER_H]"
...
...
@@ -1025,7 +1024,7 @@ const char *dcs_checksum = DCS_CHECKSUM;
EOF
_VCL (F_CLASSIFIER_C, VCL_TOP,
_VCL (F_CLASSIFIER_C, VCL_TOP,
"const char * const dcs_type[DCS_TYPE_COUNT] = {\n\t".
join(",\n\t",
map { '
[
'.$_.'
]
=
"'.c_escape($typeenum2nbtype{$_}).'"' }
...
...
@@ -1116,12 +1115,12 @@ void
dcs_parse_subkey
(
struct
dcs_matchstate
*
state
,
const
char
const
*
m
)
{
EOF
parse_token_code_gen
(
F_CLASSIFIER_C
,
-
1
,
''
,
'PARSE_TERM'
,
# function(macro) name to check if a symbol is terminator
parse_token_code_gen
(
F_CLASSIFIER_C
,
-
1
,
''
,
'PARSE_TERM'
,
# function(macro) name to check if a symbol is terminator
'PARSE_REGISTER'
,
# function(macro) to call for each match
# make a hash symbol -> SUBKEY_ID
map
{
(
$_
=>
$subkeys
{
$_
}
->
[
SUBKEY_ID
]);
}
(
sort
keys
%
subkeys
));
_VCL
(
F_CLASSIFIER_C
,
undef
,
<<
'EOF'
);
done:
return
;
...
...
@@ -1146,13 +1145,13 @@ dcs_init_matchstate (struct dcs_matchstate *state) {
static
inline
int
dcs_register_subkey_match_seen
(
dcs_seenmask_t
*
mask
,
const
dcs_subkey_id_t
subkey_id
)
{
const
dcs_subkey_id_t
word
=
subkey_id
/
sizeof
(
dcs_seenmask_t
);
const
uint8_t
bit
=
subkey_id
%
sizeof
(
dcs_seenmask_t
);
const
uint8_t
bit
=
subkey_id
%
sizeof
(
dcs_seenmask_t
);
assert
(
word
<=
DCS_MATCHSTATE_REGMASK_SZ
);
if
(
mask
[
word
]
&
(
1
<<
bit
))
return
1
;
mask
[
word
]
|=
(
1
<<
bit
);
return
0
;
}
...
...
@@ -1230,7 +1229,7 @@ void
dcs_register_subkey_match
(
struct
dcs_matchstate
*
state
,
dcs_subkey_id_t
subkey_id
)
{
const
struct
dcs_subkey
*
subkey
;
const
struct
dcs_sk2e
*
sk2e
;
dcs_entry_id_t
entry_index
,
i
;
assert
(
state
->
magic
==
DCS_MATCHSTATE_MAGIC
);
...
...
@@ -1302,7 +1301,7 @@ dcs_register_subkey_match(struct dcs_matchstate *state, dcs_subkey_id_t subkey_i
((
state
->
common_matchmask
&
dcs_entry
[
entry_index
]
.
common_matchmask
)
==
dcs_entry
[
entry_index
]
.
common_matchmask
))
{
/* have got no common_matchmask or all entries hit */
if
((
state
->
min_match_entry
==
0
)
||
(
entry_index
<
state
->
min_match_entry
))
state
->
min_match_entry = entry_ind
ex
;
...
...
@@ -1344,6 +1343,7 @@ _VCL (F_TYPE_C, VCL_TOP, <<'EOF');
#include "dcs_classifier.h"
#include "dcs_type.h"
#define check_type_id(type_id, ret) \
do
{
\
if
((
type_id
<
0
)
||
(
type_id
>
(
DCS_TYPE_COUNT
-
1
)))
{
\
...
...
@@ -1364,19 +1364,19 @@ my %types_unused = %type2classenum;
_VCL
(
F_TYPE_C
,
VCL_TOP
,
"enum dcs_type_class {\n\t"
.
join
(
",\n\t"
,
join
(
",\n\t"
,
'_T_CLASS_MISSING = 0'
,
sort
keys
%
enum2class
,
'_T_CLASS_LIMIT'
)
.
"\n};\n\n"
.
"const char * const dcs_type_class_str[_T_CLASS_LIMIT] = {\n\t"
.
join
(
",\n\t"
,
join
(
",\n\t"
,
'[_T_CLASS_MISSING] = "'
.
$default_class
.
'"'
,
map { '['.$_.'] = "'.$enum2class{$_}.'"' } sort keys %enum2class
)
.
"\n};\n\n"
.
"enum dcs_type dcs_type2class[DCS_TYPE_COUNT] = {\n\t"
.
join
(
",\n\t"
,
map
{
join
(
",\n\t"
,
map
{
my
$class_enum
=
$type2classenum
{
$_
};
if
(
defined
(
$class_enum
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment