Add a parser for float64 in byte slices, that does no allocations.

d17a0cd9 · Geoff Simmons · 87977c21 · d17a0cd9 · d17a0cd9
Commit d17a0cd9 authored Sep 05, 2018 by Geoff Simmons
Hide whitespace changes
Inline Side-by-side

Showing with 203 additions and 0 deletions

bytesconv.go pkg/log/bytesconv.go +93 -0

bytesconv_test.go pkg/log/bytesconv_test.go +110 -0

No files found.
--- a/pkg/log/bytesconv.go
+++ b/pkg/log/bytesconv.go
@@ -28,6 +28,8 @@

 package log

+import "math"
+
 const (
 	cutoffU32 = ^uint32(0) / 10
 	restU32   = byte(^uint32(0)-cutoffU32*10) + byte('0')
@@ -195,6 +197,97 @@ func parseInt64(bytes []byte) (int64, bool) {
 	return parseDec(bytes[i:], pos)
 }

+func parseE(bytes []byte, i int) (float64, int, bool) {
+	ee, es := float64(0.), float64(1.)
+	i++
+	if bytes[i] == byte('+') || bytes[i] == byte('-') {
+		if len(bytes) == 1 {
+			return 0., 0, false
+		}
+		if bytes[i+1] < byte('0') || bytes[i+1] > byte('9') {
+			return 0., 0, false
+		}
+		if bytes[i] == byte('-') {
+			es = -1.
+		}
+		i++
+	}
+	for ; i < len(bytes); i++ {
+		b := bytes[i]
+		if b < byte('0') || b > byte('9') {
+			break
+		}
+		ee = ee*10. + float64(b-byte('0'))
+	}
+	return ee * es, i, true
+}
+
+// Parse a double precision float from a byte slice. Adapted from Varnish
+// VNUMpfx() in vnum.c.
+//
+// Handles scientific notation, but not special values like NaN, Inf or
+// negative zero (-0. is intepreted as just 0.).
+//
+// Leading whitespace is ignored. Whitespace immediately after the digits
+// is permitted (and then anything else is ignored).
+//
+// Results may not be accurate up to the limits of DBL_MIN and DBL_MAX
+// (or I am unable to test for that). It is accurate up to
+// +-(1 << DBL_MANT_DIG), and certainly for typical Varnish log usage
+// (mainly, to parse Timestamp entries).
+func parseFloat64(bytes []byte) (float64, bool) {
+	m, ne := float64(0.), float64(0.)
+	ms, e := float64(1.), float64(1.)
+	l := len(bytes)
+	i := 0
+
+	for ; i < l && wsTable[bytes[i]]; i++ {
+	}
+	if i == l {
+		return 0., false
+	}
+	if bytes[i] == byte('+') || bytes[i] == byte('-') {
+		if bytes[i] == byte('-') {
+			ms = -1.
+		}
+		i++
+		if i == l {
+			return 0., false
+		}
+	}
+	for ; i < l; i++ {
+		b := bytes[i]
+		if b >= byte('0') && b <= byte('9') {
+			m = m*10. + float64(b-byte('0'))
+			e = ne
+			if e != 0.0 {
+				ne = e - 1.
+			}
+			continue
+		}
+		if b == byte('.') && ne == 0. {
+			ne = -1.0
+			continue
+		}
+		break
+	}
+	if e > 0. {
+		return 0., false
+	}
+	if i < l && (bytes[i] == byte('e') || bytes[i] == byte('E')) {
+		exp, idx, ok := parseE(bytes, i)
+		if !ok {
+			return 0., false
+		}
+		e += exp
+		i = idx
+	}
+	if i < l && !wsTable[bytes[i]] {
+		return 0., false
+	}
+	return ms * m * math.Pow(10., e), true
+}
+
 // Return the byte slice delimiters of the Nth whitespace-separated
 // field in the given byte slice, counting fields from 0 and ignoring
 // leading and trailing whitespace:

--- a/pkg/log/bytesconv_test.go
+++ b/pkg/log/bytesconv_test.go
@@ -575,3 +575,113 @@ func BenchmarkStrconvParseInt(b *testing.B) {
 		_, _ = strconv.ParseInt(string(s), 0, 64)
 	}
 }
+
+func TestParseFloat64(t *testing.T) {
+	expMap := map[string]struct {
+		val float64
+		ok  bool
+	}{
+		"":      {val: 0., ok: false},
+		"0":     {val: 0., ok: true},
+		"1":     {val: 1., ok: true},
+		"0.":    {val: 0., ok: true},
+		"1.":    {val: 1., ok: true},
+		"0.0":   {val: 0., ok: true},
+		"1.0":   {val: 1., ok: true},
+		" 1.0":  {val: 1., ok: true},
+		"1.0 ":  {val: 1., ok: true},
+		" 1.0 ": {val: 1., ok: true},
+		"foo":   {val: 0., ok: false},
+		" foo":  {val: 0., ok: false},
+
+		// tests from Varnish vnum.c
+		"12":        {val: 12., ok: true},
+		"12.":       {val: 12., ok: true},
+		"12.3":      {val: 12.3, ok: true},
+		"12.34":     {val: 12.34, ok: true},
+		"12.34e-3":  {val: 12.34e-3, ok: true},
+		"12.34e3":   {val: 12.34e3, ok: true},
+		"12.34e+3":  {val: 12.34e3, ok: true},
+		"+12.34e-3": {val: 12.34e-3, ok: true},
+		"-12.34e3":  {val: -12.34e3, ok: true},
+		".":         {val: 0., ok: false},
+		".12.":      {val: 0., ok: false},
+		"12..":      {val: 0., ok: false},
+		"12.,":      {val: 0., ok: false},
+		"12e,":      {val: 0., ok: false},
+		"12e+,":     {val: 0., ok: false},
+		"12ee,":     {val: 0., ok: false},
+		"1..2":      {val: 0., ok: false},
+		"A":         {val: 0., ok: false},
+		"1A":        {val: 0., ok: false},
+		"e-3":       {val: 0., ok: false},
+
+		"-0":     {val: 0., ok: true},
+		"-1":     {val: -1., ok: true},
+		"-0.":    {val: 0., ok: true},
+		"-1.":    {val: -1., ok: true},
+		"-0.0":   {val: 0., ok: true},
+		"-1.0":   {val: -1., ok: true},
+		" -1.0":  {val: -1., ok: true},
+		"-1.0 ":  {val: -1., ok: true},
+		" -1.0 ": {val: -1., ok: true},
+		"-foo":   {val: 0., ok: false},
+		" -foo":  {val: 0., ok: false},
+
+		// Varnish log timestamps: epoch and duration in us.
+		"1536134634.139056": {val: 1536134634.139056, ok: true},
+		"0.000001":          {val: 0.000001, ok: true},
+
+		// DBL_EPSILON with one fewer digit before 'e'
+		"2.220446049250313e-016":
+		{val: 2.220446049250313e-016, ok: true},
+		// +-2^53
+		"9.007199254740992e15":  {val: 9.007199254740992e15, ok: true},
+		"-9.007199254740992e15": {val: -9.007199254740992e15, ok: true},
+	}
+
+	for str, exp := range expMap {
+		val, ok := parseFloat64([]byte(str))
+		if ok != exp.ok || val != exp.val {
+			t.Errorf("parseFloat64(%s) want=%v got={%v %v}", str,
+				exp, val, ok)
+		}
+	}
+}
+
+var fltBenchVec = []struct{
+	name  string
+	bytes []byte
+}{
+	{"pow2mantissa", []byte("9.007199254740992e15")},
+	{"epochMicro", []byte("1536134634.139056")},
+	{"durationMicro", []byte("0.000001")},
+}
+
+func BenchmarkParseFloat64(b *testing.B) {
+	for _, v := range fltBenchVec {
+		b.Run(v.name, func(b *testing.B) {
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, ok := parseFloat64(v.bytes)
+				if !ok {
+					b.Fatal("parseFloat64() failed")
+					return
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkStrconvParseFloat(b *testing.B) {
+	for _, v := range fltBenchVec {
+		b.Run(v.name, func(b *testing.B) {
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, _ = strconv.ParseFloat(string(v.bytes), 64)
+			}
+		})
+	}
+}