Commit d17a0cd9 authored by Geoff Simmons's avatar Geoff Simmons

Add a parser for float64 in byte slices, that does no allocations.

parent 87977c21
......@@ -28,6 +28,8 @@
package log
import "math"
const (
cutoffU32 = ^uint32(0) / 10
restU32 = byte(^uint32(0)-cutoffU32*10) + byte('0')
......@@ -195,6 +197,97 @@ func parseInt64(bytes []byte) (int64, bool) {
return parseDec(bytes[i:], pos)
}
func parseE(bytes []byte, i int) (float64, int, bool) {
ee, es := float64(0.), float64(1.)
i++
if bytes[i] == byte('+') || bytes[i] == byte('-') {
if len(bytes) == 1 {
return 0., 0, false
}
if bytes[i+1] < byte('0') || bytes[i+1] > byte('9') {
return 0., 0, false
}
if bytes[i] == byte('-') {
es = -1.
}
i++
}
for ; i < len(bytes); i++ {
b := bytes[i]
if b < byte('0') || b > byte('9') {
break
}
ee = ee*10. + float64(b-byte('0'))
}
return ee * es, i, true
}
// Parse a double precision float from a byte slice. Adapted from Varnish
// VNUMpfx() in vnum.c.
//
// Handles scientific notation, but not special values like NaN, Inf or
// negative zero (-0. is intepreted as just 0.).
//
// Leading whitespace is ignored. Whitespace immediately after the digits
// is permitted (and then anything else is ignored).
//
// Results may not be accurate up to the limits of DBL_MIN and DBL_MAX
// (or I am unable to test for that). It is accurate up to
// +-(1 << DBL_MANT_DIG), and certainly for typical Varnish log usage
// (mainly, to parse Timestamp entries).
func parseFloat64(bytes []byte) (float64, bool) {
m, ne := float64(0.), float64(0.)
ms, e := float64(1.), float64(1.)
l := len(bytes)
i := 0
for ; i < l && wsTable[bytes[i]]; i++ {
}
if i == l {
return 0., false
}
if bytes[i] == byte('+') || bytes[i] == byte('-') {
if bytes[i] == byte('-') {
ms = -1.
}
i++
if i == l {
return 0., false
}
}
for ; i < l; i++ {
b := bytes[i]
if b >= byte('0') && b <= byte('9') {
m = m*10. + float64(b-byte('0'))
e = ne
if e != 0.0 {
ne = e - 1.
}
continue
}
if b == byte('.') && ne == 0. {
ne = -1.0
continue
}
break
}
if e > 0. {
return 0., false
}
if i < l && (bytes[i] == byte('e') || bytes[i] == byte('E')) {
exp, idx, ok := parseE(bytes, i)
if !ok {
return 0., false
}
e += exp
i = idx
}
if i < l && !wsTable[bytes[i]] {
return 0., false
}
return ms * m * math.Pow(10., e), true
}
// Return the byte slice delimiters of the Nth whitespace-separated
// field in the given byte slice, counting fields from 0 and ignoring
// leading and trailing whitespace:
......
......@@ -575,3 +575,113 @@ func BenchmarkStrconvParseInt(b *testing.B) {
_, _ = strconv.ParseInt(string(s), 0, 64)
}
}
func TestParseFloat64(t *testing.T) {
expMap := map[string]struct {
val float64
ok bool
}{
"": {val: 0., ok: false},
"0": {val: 0., ok: true},
"1": {val: 1., ok: true},
"0.": {val: 0., ok: true},
"1.": {val: 1., ok: true},
"0.0": {val: 0., ok: true},
"1.0": {val: 1., ok: true},
" 1.0": {val: 1., ok: true},
"1.0 ": {val: 1., ok: true},
" 1.0 ": {val: 1., ok: true},
"foo": {val: 0., ok: false},
" foo": {val: 0., ok: false},
// tests from Varnish vnum.c
"12": {val: 12., ok: true},
"12.": {val: 12., ok: true},
"12.3": {val: 12.3, ok: true},
"12.34": {val: 12.34, ok: true},
"12.34e-3": {val: 12.34e-3, ok: true},
"12.34e3": {val: 12.34e3, ok: true},
"12.34e+3": {val: 12.34e3, ok: true},
"+12.34e-3": {val: 12.34e-3, ok: true},
"-12.34e3": {val: -12.34e3, ok: true},
".": {val: 0., ok: false},
".12.": {val: 0., ok: false},
"12..": {val: 0., ok: false},
"12.,": {val: 0., ok: false},
"12e,": {val: 0., ok: false},
"12e+,": {val: 0., ok: false},
"12ee,": {val: 0., ok: false},
"1..2": {val: 0., ok: false},
"A": {val: 0., ok: false},
"1A": {val: 0., ok: false},
"e-3": {val: 0., ok: false},
"-0": {val: 0., ok: true},
"-1": {val: -1., ok: true},
"-0.": {val: 0., ok: true},
"-1.": {val: -1., ok: true},
"-0.0": {val: 0., ok: true},
"-1.0": {val: -1., ok: true},
" -1.0": {val: -1., ok: true},
"-1.0 ": {val: -1., ok: true},
" -1.0 ": {val: -1., ok: true},
"-foo": {val: 0., ok: false},
" -foo": {val: 0., ok: false},
// Varnish log timestamps: epoch and duration in us.
"1536134634.139056": {val: 1536134634.139056, ok: true},
"0.000001": {val: 0.000001, ok: true},
// DBL_EPSILON with one fewer digit before 'e'
"2.220446049250313e-016":
{val: 2.220446049250313e-016, ok: true},
// +-2^53
"9.007199254740992e15": {val: 9.007199254740992e15, ok: true},
"-9.007199254740992e15": {val: -9.007199254740992e15, ok: true},
}
for str, exp := range expMap {
val, ok := parseFloat64([]byte(str))
if ok != exp.ok || val != exp.val {
t.Errorf("parseFloat64(%s) want=%v got={%v %v}", str,
exp, val, ok)
}
}
}
var fltBenchVec = []struct{
name string
bytes []byte
}{
{"pow2mantissa", []byte("9.007199254740992e15")},
{"epochMicro", []byte("1536134634.139056")},
{"durationMicro", []byte("0.000001")},
}
func BenchmarkParseFloat64(b *testing.B) {
for _, v := range fltBenchVec {
b.Run(v.name, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, ok := parseFloat64(v.bytes)
if !ok {
b.Fatal("parseFloat64() failed")
return
}
}
})
}
}
func BenchmarkStrconvParseFloat(b *testing.B) {
for _, v := range fltBenchVec {
b.Run(v.name, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = strconv.ParseFloat(string(v.bytes), 64)
}
})
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment