Commit d7f2fb35 authored by Geoff Simmons's avatar Geoff Simmons

Add a function to efficiently get WS-separated fields from a byte slice.

Moving code to extract data from byte slices into bytesconv.go.
parent 628949aa
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
package log
const (
cutoff = ^uint32(0) / 10
rest = byte(^uint32(0)-cutoff*10) + byte('0')
)
var wsTable = wsTbl()
func wsTbl() [256]bool {
var tbl [256]bool
tbl[int(' ')] = true
tbl[int('\t')] = true
tbl[int('\n')] = true
tbl[int('\r')] = true
return tbl
}
// Parse the byte slice, containing ASCII decimal digits, as an
// uint32. The boolean return is false if the parse fails.
func atoUint32(bytes []byte) (uint32, bool) {
if len(bytes) == 0 {
return 0, false
}
val := uint32(0)
for _, b := range bytes {
if val >= cutoff {
if val > cutoff {
return 0, false
}
if b > rest {
return 0, false
}
}
if b < byte('0') || b > byte('9') {
return 0, false
}
val = val*10 + uint32(b-byte('0'))
}
return val, true
}
// Return the byte slice delimiters of the Nth whitespace-separated
// field in the given byte slice, counting fields from 0 and ignoring
// leading and trailing whitespace:
//
// min, max, ok := fieldNDelims(bytes, 1)
// if !ok {
// // ... error handling
// }
// fld1 := bytes[min:max]
// // fld1 now contains the region of field 1.
//
// Whitespace as defined for ASCII: ' ', '\t', '\n', '\r'
//
// The boolean return value is false if field < 0 or greater than the
// number of fields in the slice, or if the length of the slice is
// 0. Note that the nil slice has length 0.
//
// If there is no whitespace separating bytes in the slice, then 0 is
// the only field, encompassing the entire whitespace-trimmed slice.
func fieldNDelims(bytes []byte, field int) (int, int, bool) {
var b, e, i int
if field < 0 {
return 0, 0, false
}
l := len(bytes)
for b, e, i = 0, 0, 0; e < l && i <= field; i++ {
b = e
for b < l && wsTable[bytes[b]] {
b++
}
e = b
for e < l && !wsTable[bytes[e]] {
e++
}
}
if b == l || i <= field {
return 0, 0, false
}
return b, e, true
}
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
package log
import (
"bytes"
"strconv"
"testing"
)
type atoUint32Exp struct {
val uint32
ok bool
}
func TestAtoUint32(t *testing.T) {
expMap := map[string]atoUint32Exp{
"": atoUint32Exp{val: 0, ok: false},
"0": atoUint32Exp{val: 0, ok: true},
"1": atoUint32Exp{val: 1, ok: true},
"9": atoUint32Exp{val: 9, ok: true},
"10": atoUint32Exp{val: 10, ok: true},
"101": atoUint32Exp{val: 101, ok: true},
"1009": atoUint32Exp{val: 1009, ok: true},
"foo": atoUint32Exp{val: 0, ok: false},
"4294967290": atoUint32Exp{val: 4294967290, ok: true},
"4294967291": atoUint32Exp{val: 4294967291, ok: true},
"4294967292": atoUint32Exp{val: 4294967292, ok: true},
"4294967293": atoUint32Exp{val: 4294967293, ok: true},
"4294967294": atoUint32Exp{val: 4294967294, ok: true},
"4294967295": atoUint32Exp{val: 4294967295, ok: true},
"4294967296": atoUint32Exp{val: 0, ok: false},
"4294967297": atoUint32Exp{val: 0, ok: false},
"4294967298": atoUint32Exp{val: 0, ok: false},
"4294967299": atoUint32Exp{val: 0, ok: false},
}
for str, exp := range expMap {
val, ok := atoUint32([]byte(str))
if val != exp.val || ok != exp.ok {
t.Errorf("atoUint32(%s) want=%v got=%v", str, exp,
atoUint32Exp{val: val, ok: ok})
}
}
}
func BenchmarkAtoUint32(b *testing.B) {
bytes := []byte("4294967295")
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, ok := atoUint32(bytes)
if !ok {
b.Fatal("atoUint32() failed to parse")
return
}
}
}
func BenchmarkStrconvAtoi(b *testing.B) {
bytes := []byte("4294967295")
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := strconv.Atoi(string(bytes))
if err != nil {
b.Fatal("strconv.Atoi() failed to parse")
return
}
}
}
var delimVec = map[string][]struct{
fld int
ok bool
exp string
}{
"foo bar baz quux": {
{0, true, "foo"},
{1, true, "bar"},
{2, true, "baz"},
{3, true, "quux"},
{4, false, ""},
{5, false, ""},
},
" foo \t \n bar\r \t baz quux\n\r\n\n": {
{0, true, "foo"},
{1, true, "bar"},
{2, true, "baz"},
{3, true, "quux"},
{4, false, ""},
{5, false, ""},
},
"foobarbazquux": {
{0, true, "foobarbazquux"},
{1, false, ""},
{2, false, ""},
},
" \t\n\r foobarbazquux\r\n\n\n": {
{0, true, "foobarbazquux"},
{1, false, ""},
{2, false, ""},
},
"foobarbazquux ": {
{0, true, "foobarbazquux"},
{1, false, ""},
{2, false, ""},
},
" foobarbazquux": {
{0, true, "foobarbazquux"},
{1, false, ""},
{2, false, ""},
},
"": {
{0, false, ""},
{1, false, ""},
{2, false, ""},
},
}
func TestFieldNDelims(t *testing.T) {
s := []byte("foo bar baz quux")
_, _, ok := fieldNDelims(s, 4)
if ok {
t.Error("fieldNDelims() did not fail for N out of range")
}
_, _, ok = fieldNDelims(s, -1)
if ok {
t.Error("fieldNDelims() did not fail for N < 0")
}
_, _, ok = fieldNDelims(nil, 0)
if ok {
t.Error("fieldNDelims() did not fail the nil slice")
}
for str, expVec := range delimVec {
sl := []byte(str)
for _, exp := range expVec {
min, max, ok := fieldNDelims(sl, exp.fld)
if ok != exp.ok {
t.Errorf("fieldNdelims(%s, %d) ok want=%v "+
"got=%v", str, exp.fld, exp.ok, ok)
}
if exp.ok {
f := string(sl[min:max])
if f != exp.exp {
t.Errorf("fieldNDelims(%s, %d) want=%v"+
" got=%v", str, exp.fld,
exp.exp, f)
}
}
}
}
}
func BenchmarkFieldNDelims(b *testing.B) {
bytes := []byte("foo bar baz quux")
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, ok := fieldNDelims(bytes, 3)
if !ok {
b.Fatal("fieldNDelims() failed to parse")
return
}
}
}
func BenchmarkBytesFields(b *testing.B) {
s := []byte("foo bar baz quux")
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = bytes.Fields(s)
}
}
......@@ -62,35 +62,6 @@ var (
}
)
const (
cutoff = ^uint32(0) / 10
rest = byte(^uint32(0)-cutoff*10) + byte('0')
)
// Calling strconv.Atoi(string([]byte)) causes the string to escape to
// the heap.
func atoUint32(bytes []byte) (uint32, bool) {
if len(bytes) == 0 {
return 0, false
}
val := uint32(0)
for _, b := range bytes {
if val >= cutoff {
if val > cutoff {
return 0, false
}
if b > rest {
return 0, false
}
}
if b < byte('0') || b > byte('9') {
return 0, false
}
val = val*10 + uint32(b-byte('0'))
}
return val, true
}
type grpNode struct {
children []uint32
vxid uint32
......
......@@ -30,42 +30,6 @@ package log
import "testing"
type atoUint32Exp struct {
val uint32
ok bool
}
func TestAtoUint32(t *testing.T) {
expMap := map[string]atoUint32Exp{
"": atoUint32Exp{val: 0, ok: false},
"0": atoUint32Exp{val: 0, ok: true},
"1": atoUint32Exp{val: 1, ok: true},
"9": atoUint32Exp{val: 9, ok: true},
"10": atoUint32Exp{val: 10, ok: true},
"101": atoUint32Exp{val: 101, ok: true},
"1009": atoUint32Exp{val: 1009, ok: true},
"foo": atoUint32Exp{val: 0, ok: false},
"4294967290": atoUint32Exp{val: 4294967290, ok: true},
"4294967291": atoUint32Exp{val: 4294967291, ok: true},
"4294967292": atoUint32Exp{val: 4294967292, ok: true},
"4294967293": atoUint32Exp{val: 4294967293, ok: true},
"4294967294": atoUint32Exp{val: 4294967294, ok: true},
"4294967295": atoUint32Exp{val: 4294967295, ok: true},
"4294967296": atoUint32Exp{val: 0, ok: false},
"4294967297": atoUint32Exp{val: 0, ok: false},
"4294967298": atoUint32Exp{val: 0, ok: false},
"4294967299": atoUint32Exp{val: 0, ok: false},
}
for str, exp := range expMap {
val, ok := atoUint32([]byte(str))
if val != exp.val || ok != exp.ok {
t.Errorf("atoUint32(%s) want=%v got=%v", str, exp,
atoUint32Exp{val: val, ok: ok})
}
}
}
func TestNewQuery(t *testing.T) {
l := New()
defer l.Release()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment