Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
libvmod-selector
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
uplex-varnish
libvmod-selector
Commits
7fc3a4fe
Commit
7fc3a4fe
authored
Sep 07, 2020
by
Geoff Simmons
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Remove the patricia interface.
parent
7f6a85f9
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
3 additions
and
980 deletions
+3
-980
Makefile.am
src/Makefile.am
+2
-6
patricia.c
src/patricia.c
+0
-433
patricia.h
src/patricia.h
+0
-75
Makefile.am
src/tests/bench/Makefile.am
+1
-5
bench.c
src/tests/bench/bench.c
+0
-461
No files found.
src/Makefile.am
View file @
7fc3a4fe
...
...
@@ -7,8 +7,6 @@ vmod_LTLIBRARIES = libvmod_selector.la
libvmod_selector_la_SOURCES
=
\
vmod_selector.c
\
patricia.h
\
patricia.c
\
qp.h
\
qp.c
\
popcnt_compat.h
\
...
...
@@ -26,11 +24,9 @@ dist_man_MANS = vmod_selector.3
@BUILD_VSC_SELECTOR@
vmod_selector.c
patricia.c
:
patricia
.h
vmod_selector.c
qp.c
:
qp.h popcnt_compat
.h
qp.c
:
qp.h popcnt_compat.h
ph.c
:
ph.h rnd.h
vmod_selector.c ph.c
:
ph.h rnd.h
vmod_selector.lo
:
$(nodist_libvmod_selector_la_SOURCES)
...
...
src/patricia.c
deleted
100644 → 0
View file @
7f6a85f9
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Inspired in part by Varnish hash_critbit.c and tarsnap's Patricia
* implementation.
*/
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "patricia.h"
/*
* A trie is comprised by a tree of pt_y nodes and an array of strings,
* both of which are owned by a VMOD object. Both are passed in to the
* PT_* functions.
*
* idx is an index to the strings array, and the region of the string
* represented at a node is strings[idx][off] to strings[idx][off+len],
* inclusive.
*
* Terminating nulls form a part of the target string. So "foo\0" and
* "foobar\0" differ at the fourth byte, and the critical bit at that byte
* is the most significant 1-bit in 'b'.
*
* bitmask is set at the critical bit in the differing byte, and
* determines whether searches continue into leaf[0] or leaf[1].
*/
struct
pt_y
{
unsigned
magic
;
#define PT_Y_MAGIC 0xfa564d14
unsigned
idx
;
struct
pt_y
*
leaf
[
2
];
unsigned
short
off
;
unsigned
short
len
;
unsigned
char
bitmask
;
};
static
unsigned
char
pt_bittbl
[
256
]
=
{
0
};
void
PT_Init
(
void
)
{
unsigned
char
x
;
unsigned
y
;
AZ
(
pt_bittbl
[
0x34
^
0x34
]);
y
=
0
;
for
(
x
=
0
;
x
<
8
;
x
++
)
for
(;
y
<
(
1U
<<
x
);
y
++
)
pt_bittbl
[
y
]
=
8
-
x
;
/* Quick asserts for sanity check */
assert
(
pt_bittbl
[
0x34
^
0x34
]
==
8
);
AZ
(
pt_bittbl
[
0xaa
^
0x55
]);
assert
(
pt_bittbl
[
0x01
^
0x22
]
==
2
);
assert
(
pt_bittbl
[
0x10
^
0x0b
]
==
3
);
}
int
PT_Inited
(
void
)
{
return
(
pt_bittbl
[
0x34
^
0x34
]
!=
0
);
}
static
struct
pt_y
*
y_alloc
(
unsigned
idx
,
unsigned
short
off
,
size_t
len
)
{
struct
pt_y
*
y
;
if
(
len
>
USHRT_MAX
)
{
errno
=
ERANGE
;
return
(
NULL
);
}
errno
=
0
;
ALLOC_OBJ
(
y
,
PT_Y_MAGIC
);
if
(
y
==
NULL
)
return
(
NULL
);
y
->
idx
=
idx
;
y
->
off
=
off
;
y
->
len
=
(
unsigned
short
)
len
;
AZ
(
y
->
leaf
[
0
]);
AZ
(
y
->
leaf
[
1
]);
AZ
(
y
->
bitmask
);
return
(
y
);
}
static
inline
struct
pt_y
*
y_leaf_alloc
(
unsigned
idx
,
unsigned
char
*
c
,
unsigned
char
*
b
)
{
return
y_alloc
(
idx
,
(
unsigned
short
)(
uintptr_t
)(
c
-
b
),
strlen
((
char
*
)
c
));
}
static
struct
pt_y
*
y_dup
(
struct
pt_y
*
y0
,
unsigned
short
len
)
{
struct
pt_y
*
y
;
assert
(
len
<
y0
->
len
);
y
=
y_alloc
(
y0
->
idx
,
y0
->
off
+
len
,
y0
->
len
-
len
);
if
(
y
==
NULL
)
return
(
NULL
);
y
->
leaf
[
0
]
=
y0
->
leaf
[
0
];
y
->
leaf
[
1
]
=
y0
->
leaf
[
1
];
y
->
bitmask
=
y0
->
bitmask
;
return
(
y
);
}
int
PT_Insert
(
struct
pt_y
*
*
restrict
root
,
unsigned
idx
,
char
*
const
restrict
*
const
restrict
strings
)
{
struct
pt_y
*
y
;
unsigned
char
*
c
,
*
b
;
AN
(
root
);
CHECK_OBJ_ORNULL
(
*
root
,
PT_Y_MAGIC
);
AN
(
strings
);
AN
(
strings
[
idx
]);
if
(
*
root
==
NULL
)
{
*
root
=
y_alloc
(
idx
,
0
,
strlen
(
strings
[
idx
]));
if
(
*
root
==
NULL
)
return
(
-
1
);
return
(
0
);
}
y
=
*
root
;
b
=
(
unsigned
char
*
)
strings
[
idx
];
c
=
b
;
errno
=
0
;
for
(;;)
{
unsigned
short
i
;
unsigned
char
*
s
;
unsigned
char
bit
;
struct
pt_y
*
y_old
,
*
y_new
;
CHECK_OBJ_NOTNULL
(
y
,
PT_Y_MAGIC
);
s
=
(
unsigned
char
*
)(
strings
[
y
->
idx
]
+
y
->
off
);
for
(
i
=
0
;
*
c
!=
'\0'
&&
i
<
y
->
len
&&
s
[
i
]
==
*
c
;
i
++
)
c
++
;
if
(
s
[
i
]
==
'\0'
&&
*
c
==
'\0'
)
{
/*
* The string to be inserted is already in the
* trie.
*/
assert
(
i
==
y
->
len
);
errno
=
EINVAL
;
return
(
-
1
);
}
if
(
i
==
y
->
len
)
{
/*
* The string to be inserted has a prefix that is
* already in the trie.
*/
bit
=
(
y
->
bitmask
&
*
c
)
!=
0
;
assert
(
bit
<
2
);
if
(
y
->
leaf
[
bit
]
!=
NULL
)
{
y
=
y
->
leaf
[
bit
];
continue
;
}
y_new
=
y_leaf_alloc
(
idx
,
c
,
b
);
if
(
y_new
==
NULL
)
return
(
-
1
);
y
->
leaf
[
bit
]
=
y_new
;
return
(
0
);
}
/* Split the current node. */
y_new
=
y_leaf_alloc
(
idx
,
c
,
b
);
if
(
y_new
==
NULL
)
return
(
-
1
);
y_old
=
y_dup
(
y
,
i
);
if
(
y_old
==
NULL
)
{
FREE_OBJ
(
y_new
);
return
(
-
1
);
}
y
->
len
=
i
;
y
->
bitmask
=
0x80
>>
pt_bittbl
[
s
[
i
]
^
*
c
];
assert
((
*
c
&
y
->
bitmask
)
!=
(
s
[
i
]
&
y
->
bitmask
));
bit
=
(
*
c
&
y
->
bitmask
)
!=
0
;
assert
(
bit
<
2
);
y
->
leaf
[
bit
]
=
y_new
;
y
->
leaf
[
1
-
bit
]
=
y_old
;
return
(
0
);
}
}
unsigned
PT_Lookup
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
const
char
*
const
restrict
subject
)
{
const
struct
pt_y
*
y
=
root
;
size_t
len
;
AN
(
strings
);
AN
(
subject
);
if
(
root
==
NULL
)
return
UINT_MAX
;
len
=
strlen
(
subject
);
for
(;;)
{
unsigned
char
b
;
size_t
l
;
CHECK_OBJ
(
y
,
PT_Y_MAGIC
);
l
=
y
->
off
+
y
->
len
;
if
(
l
>
len
)
return
UINT_MAX
;
if
(
l
==
len
&&
strings
[
y
->
idx
][
l
]
==
'\0'
)
break
;
b
=
(
y
->
bitmask
&
subject
[
l
])
!=
0
;
assert
(
b
<
2
);
if
(
y
->
leaf
[
b
]
==
NULL
)
return
UINT_MAX
;
y
=
y
->
leaf
[
b
];
}
if
(
strcmp
(
subject
,
strings
[
y
->
idx
])
==
0
)
return
y
->
idx
;
return
(
UINT_MAX
);
}
static
int
pt_search
(
const
struct
pt_y
*
const
restrict
y
,
char
*
const
restrict
*
const
restrict
strings
,
const
unsigned
char
*
restrict
subject
,
size_t
len
,
struct
match_data
*
const
restrict
match
)
{
size_t
l
;
if
(
y
==
NULL
)
return
(
0
);
CHECK_OBJ
(
y
,
PT_Y_MAGIC
);
l
=
y
->
off
+
y
->
len
;
if
(
l
>
len
)
return
(
0
);
if
(
y
->
len
>
0
&&
memcmp
(
subject
+
y
->
off
,
strings
[
y
->
idx
]
+
y
->
off
,
y
->
len
)
!=
0
)
return
(
0
);
if
(
strings
[
y
->
idx
][
l
]
==
'\0'
)
{
if
(
match
->
n
==
match
->
limit
)
return
(
-
1
);
match
->
indices
[
match
->
n
]
=
y
->
idx
;
match
->
n
++
;
if
(
y
->
idx
<
match
->
min
)
match
->
min
=
y
->
idx
;
if
(
y
->
idx
>
match
->
max
)
match
->
max
=
y
->
idx
;
if
(
l
==
len
)
{
match
->
exact
=
y
->
idx
;
return
(
0
);
}
}
if
(
pt_search
(
y
->
leaf
[
0
],
strings
,
subject
,
len
,
match
)
!=
0
)
return
(
-
1
);
if
(
pt_search
(
y
->
leaf
[
1
],
strings
,
subject
,
len
,
match
)
!=
0
)
return
(
-
1
);
return
(
0
);
}
int
PT_Prefixes
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
const
char
*
const
restrict
subject
,
struct
match_data
*
const
restrict
match
)
{
size_t
len
;
CHECK_OBJ_NOTNULL
(
match
,
MATCH_DATA_MAGIC
);
AN
(
match
->
indices
);
AN
(
match
->
limit
);
AN
(
strings
);
AN
(
subject
);
match
->
n
=
0
;
match
->
min
=
UINT_MAX
;
match
->
max
=
0
;
match
->
exact
=
UINT_MAX
;
len
=
strlen
(
subject
);
return
(
pt_search
(
root
,
strings
,
(
unsigned
char
*
)
subject
,
len
,
match
));
}
void
PT_Free
(
struct
pt_y
*
y
)
{
if
(
y
==
NULL
)
return
;
CHECK_OBJ
(
y
,
PT_Y_MAGIC
);
PT_Free
(
y
->
leaf
[
0
]);
PT_Free
(
y
->
leaf
[
1
]);
FREE_OBJ
(
y
);
}
static
void
pt_print_tree
(
struct
pt_y
*
y
,
struct
vsb
*
sb
,
char
**
strings
)
{
CHECK_OBJ_NOTNULL
(
y
,
PT_Y_MAGIC
);
CHECK_OBJ_NOTNULL
(
sb
,
VSB_MAGIC
);
VSB_printf
(
sb
,
"node = %p
\n
"
,
y
);
VSB_printf
(
sb
,
"leaf[0] = %p
\n
"
,
y
->
leaf
[
0
]);
VSB_printf
(
sb
,
"leaf[1] = %p
\n
"
,
y
->
leaf
[
1
]);
VSB_printf
(
sb
,
"idx = %u
\n
"
,
y
->
idx
);
VSB_printf
(
sb
,
"off = %u
\n
"
,
y
->
off
);
VSB_printf
(
sb
,
"len = %u
\n
"
,
y
->
len
);
AN
(
strings
[
y
->
idx
]);
VSB_printf
(
sb
,
"strings[idx] = %s
\n
"
,
strings
[
y
->
idx
]);
VSB_printf
(
sb
,
"strings[idx][0]..[off] = %.*s
\n
"
,
y
->
off
,
strings
[
y
->
idx
]);
VSB_printf
(
sb
,
"strings[idx][off]..[off+len] = %.*s
\n
"
,
y
->
len
,
strings
[
y
->
idx
]
+
y
->
off
);
VSB_printf
(
sb
,
"bitmask = 0x%02x
\n\n
"
,
y
->
bitmask
);
if
(
y
->
leaf
[
0
]
!=
NULL
)
pt_print_tree
(
y
->
leaf
[
0
],
sb
,
strings
);
if
(
y
->
leaf
[
1
]
!=
NULL
)
pt_print_tree
(
y
->
leaf
[
1
],
sb
,
strings
);
}
struct
vsb
*
PT_Dump
(
struct
pt_y
*
root
,
char
**
strings
)
{
struct
vsb
*
sb
=
VSB_new_auto
();
VSB_printf
(
sb
,
"root = %p
\n\n
"
,
root
);
if
(
root
!=
NULL
)
{
AN
(
strings
);
pt_print_tree
(
root
,
sb
,
strings
);
}
VSB_finish
(
sb
);
return
(
sb
);
}
void
pt_stats
(
const
struct
pt_y
*
const
restrict
y
,
char
*
const
restrict
*
const
restrict
strings
,
struct
pt_stats
*
const
restrict
stats
,
unsigned
depth
)
{
if
(
y
==
NULL
)
return
;
CHECK_OBJ
(
y
,
PT_Y_MAGIC
);
depth
++
;
stats
->
nodes
++
;
if
(
strings
[
y
->
idx
][
y
->
off
+
y
->
len
]
==
'\0'
)
{
if
(
depth
<
stats
->
dmin
)
stats
->
dmin
=
depth
;
if
(
depth
>
stats
->
dmax
)
stats
->
dmax
=
depth
;
stats
->
davg
+=
(
depth
-
stats
->
davg
)
/
(
stats
->
terms
+
1
.);
stats
->
terms
++
;
}
if
(
y
->
leaf
[
0
]
==
NULL
&&
y
->
leaf
[
1
]
==
NULL
)
{
stats
->
leaves
++
;
return
;
}
pt_stats
(
y
->
leaf
[
0
],
strings
,
stats
,
depth
);
pt_stats
(
y
->
leaf
[
1
],
strings
,
stats
,
depth
);
}
void
PT_Stats
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
struct
pt_stats
*
const
restrict
stats
)
{
CHECK_OBJ_NOTNULL
(
stats
,
PT_STATS_MAGIC
);
stats
->
nodes
=
0
;
stats
->
leaves
=
0
;
stats
->
terms
=
0
;
stats
->
dmin
=
UINT64_MAX
;
stats
->
dmax
=
0
;
stats
->
davg
=
0
.;
stats
->
nodesz
=
sizeof
(
*
root
);
pt_stats
(
root
,
strings
,
stats
,
0
);
}
src/patricia.h
deleted
100644 → 0
View file @
7f6a85f9
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include "vsb.h"
struct
pt_y
;
struct
match_data
{
unsigned
magic
;
#define MATCH_DATA_MAGIC 0x0d9a845e
unsigned
*
indices
;
unsigned
limit
;
unsigned
n
;
unsigned
exact
;
unsigned
min
;
unsigned
max
;
};
struct
pt_stats
{
unsigned
magic
;
#define PT_STATS_MAGIC 0xf1c1114e
uint64_t
nodes
;
uint64_t
leaves
;
uint64_t
terms
;
uint64_t
nodesz
;
uint64_t
dmin
;
uint64_t
dmax
;
double
davg
;
};
void
PT_Init
(
void
);
int
PT_Inited
(
void
);
int
PT_Insert
(
struct
pt_y
*
*
restrict
root
,
unsigned
idx
,
char
*
const
restrict
*
const
restrict
strings
);
unsigned
PT_Lookup
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
const
char
*
const
restrict
subject
);
int
PT_Prefixes
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
const
char
*
const
restrict
subject
,
struct
match_data
*
const
restrict
match
);
void
PT_Stats
(
const
struct
pt_y
*
const
restrict
root
,
char
*
const
restrict
*
const
restrict
strings
,
struct
pt_stats
*
const
restrict
stats
);
void
PT_Free
(
struct
pt_y
*
y
);
struct
vsb
*
PT_Dump
(
struct
pt_y
*
root
,
char
**
strings
);
src/tests/bench/Makefile.am
View file @
7fc3a4fe
...
...
@@ -4,11 +4,7 @@ AM_CFLAGS = $(VARNISHAPI_CFLAGS) -I$(top_srcdir)/src -Wall -Werror -Wextra \
-std
=
c99
AM_LDFLAGS
=
$(VARNISHAPI_LIBS)
bin_PROGRAMS
=
bench bench_qp bench_ph
bench_SOURCES
=
bench.c
bench_LDADD
=
$(top_builddir)
/src/.libs/patricia.o
bin_PROGRAMS
=
bench_qp bench_ph
bench_qp_SOURCES
=
bench_qp.c
...
...
src/tests/bench/bench.c
deleted
100644 → 0
View file @
7f6a85f9
/*-
* Copyright (c) 2020 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for strdup() and getline() */
#define _POSIX_C_SOURCE 200809L
/* for srand48() and drand48() */
#define _XOPEN_SOURCE
#include "config.h"
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "vsb.h"
#include "patricia.h"
#define BILLION (1000 * 1000 * 1000)
#define ITERATIONS (1000)
#define CLOCK (CLOCK_MONOTONIC)
static
struct
pt_y
*
origo
=
NULL
;
static
uint64_t
tdiff
(
struct
timespec
*
before
,
struct
timespec
*
after
)
{
uint64_t
diff
;
if
(
after
->
tv_nsec
-
before
->
tv_nsec
<
0
)
{
diff
=
(
after
->
tv_sec
-
before
->
tv_sec
-
1
)
*
BILLION
;
diff
+=
BILLION
+
after
->
tv_nsec
-
before
->
tv_nsec
;
return
diff
;
}
diff
=
(
after
->
tv_sec
-
before
->
tv_sec
)
*
BILLION
;
diff
+=
after
->
tv_nsec
-
before
->
tv_nsec
;
return
diff
;
}
static
int
rnd
(
int
lo
,
int
hi
)
{
return
((
int
)(
drand48
()
*
(
hi
-
lo
))
+
lo
);
}
static
inline
void
shuffle
(
unsigned
*
index
,
size_t
n
)
{
for
(
unsigned
i
=
0
;
i
<
n
-
1
;
i
++
)
{
int
j
=
rnd
(
i
,
n
);
int
temp
=
index
[
i
];
index
[
i
]
=
index
[
j
];
index
[
j
]
=
temp
;
}
}
static
void
free_on_exit
(
void
)
{
PT_Free
(
origo
);
}
void
usage
(
const
char
*
argv
,
int
status
)
{
fprintf
(
stderr
,
"Usage: %s [-hs] [-c csvfile] [-d dumpfile] [-i inputfile]
\n
"
" [-m m|p] [-n iterations] [file]
\n
"
,
argv
);
exit
(
status
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
FILE
*
stringsf
=
stdin
,
*
csv
=
NULL
;
char
**
strings
=
NULL
,
**
inputs
=
NULL
,
*
line
,
*
inputf
=
NULL
,
*
csvf
=
NULL
,
*
dumpf
=
NULL
;
size_t
lineln
=
LINE_MAX
;
ssize_t
readln
;
unsigned
n
=
0
,
ninputs
=
0
;
struct
timespec
before
,
after
,
start
,
finish
;
uint64_t
ns
=
0
,
iters
,
matches
,
exacts
,
misses
;
struct
pt_stats
stats
=
{
.
magic
=
PT_STATS_MAGIC
};
int
opt
,
do_shuf
=
0
,
do_iters
=
ITERATIONS
,
do_match
=
1
,
do_prefix
=
1
;
struct
rusage
rusage
;
while
((
opt
=
getopt
(
argc
,
argv
,
"hsc:d:i:m:n:"
))
!=
-
1
)
{
switch
(
opt
)
{
case
'c'
:
csvf
=
optarg
;
break
;
case
'd'
:
dumpf
=
optarg
;
break
;
case
'h'
:
usage
(
argv
[
0
],
EXIT_SUCCESS
);
break
;
case
'i'
:
inputf
=
optarg
;
break
;
case
'm'
:
if
(
strcmp
(
optarg
,
"m"
)
==
0
)
do_prefix
=
0
;
else
if
(
strcmp
(
optarg
,
"p"
)
==
0
)
do_match
=
0
;
else
usage
(
argv
[
0
],
EXIT_FAILURE
);
break
;
case
'n'
:
do_iters
=
atoi
(
optarg
);
break
;
case
's'
:
do_shuf
=
1
;
break
;
default:
usage
(
argv
[
0
],
EXIT_FAILURE
);
}
}
if
(
argc
-
optind
>
1
)
usage
(
argv
[
0
],
EXIT_FAILURE
);
if
(
argc
==
optind
+
1
&&
strcmp
(
argv
[
optind
],
"-"
)
!=
0
)
{
errno
=
0
;
if
((
stringsf
=
fopen
(
argv
[
optind
],
"r"
))
==
NULL
)
{
fprintf
(
stderr
,
"Cannot open %s: %s
\n
"
,
argv
[
optind
],
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
}
line
=
malloc
(
lineln
);
AN
(
line
);
errno
=
0
;
while
((
readln
=
getline
(
&
line
,
&
lineln
,
stringsf
))
!=
-
1
)
{
if
(
readln
-
1
>
USHRT_MAX
)
{
fprintf
(
stderr
,
"String length %zd too long (max %d)
\n
"
,
readln
-
1
,
USHRT_MAX
);
exit
(
EXIT_FAILURE
);
}
n
++
;
strings
=
realloc
(
strings
,
n
*
sizeof
(
char
*
));
AN
(
strings
);
line
[
readln
-
1
]
=
'\0'
;
strings
[
n
-
1
]
=
strdup
(
line
);
AN
(
strings
[
n
-
1
]);
}
if
(
ferror
(
stringsf
))
{
fprintf
(
stderr
,
"Error reading %s: %s"
,
stringsf
==
stdin
?
"stdin"
:
argv
[
optind
],
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
errno
=
0
;
if
(
stringsf
!=
stdin
&&
fclose
(
stringsf
)
!=
0
)
{
fprintf
(
stderr
,
"Error closing %s: %s
\n
"
,
argv
[
optind
],
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
printf
(
"%u strings read
\n
"
,
n
);
if
(
n
==
0
)
exit
(
EXIT_SUCCESS
);
if
(
inputf
!=
NULL
)
{
FILE
*
inf
;
errno
=
0
;
if
((
inf
=
fopen
(
inputf
,
"r"
))
==
NULL
)
{
fprintf
(
stderr
,
"Cannot open %s: %s
\n
"
,
inputf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
while
((
readln
=
getline
(
&
line
,
&
lineln
,
inf
))
!=
-
1
)
{
if
(
readln
-
1
>
USHRT_MAX
)
{
fprintf
(
stderr
,
"%s: string length %zd "
"too long (max %d)
\n
"
,
inputf
,
readln
-
1
,
USHRT_MAX
);
exit
(
EXIT_FAILURE
);
}
ninputs
++
;
inputs
=
realloc
(
inputs
,
ninputs
*
sizeof
(
char
*
));
AN
(
inputs
);
line
[
readln
-
1
]
=
'\0'
;
inputs
[
ninputs
-
1
]
=
strdup
(
line
);
AN
(
inputs
[
ninputs
-
1
]);
}
if
(
ferror
(
inf
))
{
fprintf
(
stderr
,
"Error reading %s: %s"
,
inputf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
errno
=
0
;
if
(
fclose
(
inf
)
!=
0
)
{
fprintf
(
stderr
,
"Error closing %s: %s
\n
"
,
inputf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
printf
(
"%u input strings read
\n
"
,
ninputs
);
}
if
(
inputf
==
NULL
||
ninputs
==
0
)
{
printf
(
"Using string set as inputs
\n
"
);
inputs
=
strings
;
ninputs
=
n
;
}
free
(
line
);
if
(
csvf
!=
NULL
)
{
errno
=
0
;
if
((
csv
=
fopen
(
csvf
,
"w"
))
==
NULL
)
{
fprintf
(
stderr
,
"Cannot open %s: %s
\n
"
,
csvf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
fprintf
(
csv
,
"type,matches,exact,t
\n
"
);
}
AZ
(
clock_gettime
(
CLOCK_REALTIME
,
&
before
));
srand48
(
before
.
tv_nsec
);
AZ
(
clock_getres
(
CLOCK
,
&
before
));
printf
(
"Clock resolution %ld ns
\n
"
,
before
.
tv_sec
*
BILLION
+
before
.
tv_nsec
);
printf
(
"
\n
Initializing ...
\n
"
);
AZ
(
PT_Inited
());
PT_Init
();
AN
(
PT_Inited
());
printf
(
"Building trie ...
\n
"
);
for
(
unsigned
i
=
0
;
i
<
n
;
i
++
)
{
int
ret
;
errno
=
0
;
(
void
)
clock_gettime
(
CLOCK
,
&
before
);
ret
=
PT_Insert
(
&
origo
,
i
,
strings
);
(
void
)
clock_gettime
(
CLOCK
,
&
after
);
if
(
ret
!=
0
)
{
fprintf
(
stderr
,
"PT_Insert() failed: %s
\n
"
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
ns
+=
tdiff
(
&
before
,
&
after
);
}
AZ
(
atexit
(
free_on_exit
));
printf
(
"... complete.
\n
"
);
printf
(
"%u strings inserted in %.9f s, mean %lu ns/insert
\n
"
,
n
,
ns
*
1e-9
,
ns
/
n
);
if
(
dumpf
!=
NULL
)
{
FILE
*
df
;
struct
vsb
*
vsb
;
printf
(
"
\n
Dumping trie to %s ...
\n
"
,
dumpf
);
vsb
=
PT_Dump
(
origo
,
strings
);
CHECK_OBJ_NOTNULL
(
vsb
,
VSB_MAGIC
);
errno
=
0
;
if
((
df
=
fopen
(
dumpf
,
"w"
))
==
NULL
)
{
fprintf
(
stderr
,
"Cannot open %s: %s
\n
"
,
dumpf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
if
(
fwrite
(
VSB_data
(
vsb
),
1
,
VSB_len
(
vsb
),
df
)
!=
(
unsigned
)
VSB_len
(
vsb
))
{
fprintf
(
stderr
,
"Error writing to %s: %s
\n
"
,
dumpf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
VSB_destroy
(
&
vsb
);
if
(
fclose
(
df
)
!=
0
)
{
fprintf
(
stderr
,
"Error closing %s: %s
\n
"
,
dumpf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
printf
(
"... done.
\n
"
);
}
printf
(
"
\n
Getting stats ...
\n
"
);
(
void
)
clock_gettime
(
CLOCK
,
&
before
);
PT_Stats
(
origo
,
strings
,
&
stats
);
(
void
)
clock_gettime
(
CLOCK
,
&
after
);
printf
(
"Stats computed in %lu ns
\n
"
,
tdiff
(
&
before
,
&
after
));
printf
(
"%lu nodes
\n
"
,
stats
.
nodes
);
printf
(
"%lu leaves
\n
"
,
stats
.
leaves
);
printf
(
"%lu terminal nodes
\n
"
,
stats
.
terms
);
printf
(
"%lu bytes node size
\n
"
,
stats
.
nodesz
);
printf
(
"%lu min terminating node depth
\n
"
,
stats
.
dmin
);
printf
(
"%lu max terminating node depth
\n
"
,
stats
.
dmax
);
printf
(
"%.1f mean terminating node depth
\n
"
,
stats
.
davg
);
if
(
do_iters
==
0
)
exit
(
EXIT_SUCCESS
);
unsigned
index
[
ninputs
];
for
(
unsigned
i
=
0
;
i
<
ninputs
;
i
++
)
index
[
i
]
=
i
;
printf
(
"
\n
Benchmarking with %d iterations of %u input strings
\n
"
,
do_iters
,
ninputs
);
if
(
do_shuf
)
printf
(
"Shuffling inputs on each iteration
\n
"
);
if
(
!
do_match
)
goto
prefix
;
printf
(
"
\n
Benchmarking matches ...
\n
"
);
ns
=
0
;
iters
=
0
;
matches
=
0
;
(
void
)
clock_gettime
(
CLOCK
,
&
start
);
for
(
int
iter
=
0
;
iter
<
do_iters
;
iter
++
)
{
if
(
do_shuf
)
shuffle
(
index
,
ninputs
);
for
(
unsigned
i
=
0
;
i
<
ninputs
;
i
++
)
{
unsigned
idx
,
stridx
=
index
[
i
];
uint64_t
diff
;
(
void
)
clock_gettime
(
CLOCK
,
&
before
);
idx
=
PT_Lookup
(
origo
,
strings
,
inputs
[
stridx
]);
(
void
)
clock_gettime
(
CLOCK
,
&
after
);
if
(
idx
!=
UINT_MAX
)
{
AZ
(
strcmp
(
strings
[
idx
],
inputs
[
stridx
]));
matches
++
;
}
diff
=
tdiff
(
&
before
,
&
after
);
ns
+=
diff
;
iters
++
;
if
(
csv
!=
NULL
)
{
if
(
idx
!=
UINT_MAX
)
fprintf
(
csv
,
"match,1,1,%lu
\n
"
,
diff
);
else
fprintf
(
csv
,
"match,0,0,%lu
\n
"
,
diff
);
}
}
}
assert
(
matches
<=
iters
);
printf
(
"... complete.
\n
"
);
printf
(
"%ld match operations in %.09f s, mean %lu ns/op
\n
"
,
iters
,
ns
*
1e-9
,
ns
/
iters
);
printf
(
"%ld matches, %ld misses
\n
"
,
matches
,
iters
-
matches
);
prefix:
if
(
!
do_prefix
)
goto
finish
;
printf
(
"
\n
Benchmarking prefix matches ...
\n
"
);
unsigned
*
indices
=
malloc
(
ninputs
*
sizeof
(
*
indices
));
AN
(
indices
);
struct
match_data
match
=
{
.
magic
=
MATCH_DATA_MAGIC
,
.
indices
=
indices
,
.
limit
=
ninputs
,
};
ns
=
0
;
iters
=
0
;
matches
=
0
;
misses
=
0
;
exacts
=
0
;
for
(
int
iter
=
0
;
iter
<
do_iters
;
iter
++
)
{
if
(
do_shuf
)
shuffle
(
index
,
ninputs
);
for
(
unsigned
i
=
0
;
i
<
ninputs
;
i
++
)
{
unsigned
stridx
=
index
[
i
];
int
ret
;
uint64_t
diff
;
(
void
)
clock_gettime
(
CLOCK
,
&
before
);
ret
=
PT_Prefixes
(
origo
,
strings
,
inputs
[
stridx
],
&
match
);
(
void
)
clock_gettime
(
CLOCK
,
&
after
);
AZ
(
ret
);
if
(
match
.
n
>
0
)
{
assert
(
match
.
min
<=
match
.
max
);
matches
+=
match
.
n
;
}
else
misses
++
;
for
(
unsigned
j
=
0
;
j
<
match
.
n
;
j
++
)
{
assert
(
match
.
indices
[
j
]
>=
match
.
min
);
assert
(
match
.
indices
[
j
]
<=
match
.
max
);
AN
(
strstr
(
inputs
[
stridx
],
strings
[
match
.
indices
[
j
]]));
}
if
(
match
.
exact
!=
UINT_MAX
)
{
AZ
(
strcmp
(
inputs
[
stridx
],
strings
[
match
.
exact
]));
exacts
++
;
}
diff
=
tdiff
(
&
before
,
&
after
);
ns
+=
diff
;
iters
++
;
if
(
csv
!=
NULL
)
{
if
(
match
.
n
>
0
)
fprintf
(
csv
,
"prefix,%u,%d,%lu
\n
"
,
match
.
n
,
match
.
exact
!=
UINT_MAX
,
diff
);
else
fprintf
(
csv
,
"prefix,0,0,%lu
\n
"
,
diff
);
}
}
}
(
void
)
clock_gettime
(
CLOCK
,
&
finish
);
AZ
(
getrusage
(
RUSAGE_SELF
,
&
rusage
));
assert
(
matches
>=
exacts
);
printf
(
"... complete.
\n
"
);
printf
(
"%ld prefix match operations in %.09f s, mean %lu ns/op
\n
"
,
iters
,
ns
*
1e-9
,
ns
/
iters
);
printf
(
"%ld prefixes found, %ld exact matches, %ld misses
\n
"
,
matches
,
exacts
,
misses
);
finish:
if
(
csv
!=
NULL
&&
fclose
(
csv
)
!=
0
)
{
fprintf
(
stderr
,
"Error closing %s: %s
\n
"
,
csvf
,
strerror
(
errno
));
exit
(
EXIT_FAILURE
);
}
ns
=
tdiff
(
&
start
,
&
finish
);
printf
(
"
\n
Benchmark wall clock time %.09f s
\n
"
,
ns
*
1e-9
);
AZ
(
getrusage
(
RUSAGE_SELF
,
&
rusage
));
printf
(
"user %.06f s, sys %.06f s, vcsw %ld, ivcsw %ld
\n
"
,
rusage
.
ru_utime
.
tv_sec
+
1e-6
*
rusage
.
ru_utime
.
tv_usec
,
rusage
.
ru_stime
.
tv_sec
+
1e-6
*
rusage
.
ru_stime
.
tv_usec
,
rusage
.
ru_nvcsw
,
rusage
.
ru_nivcsw
);
exit
(
EXIT_SUCCESS
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment