438 lines
11 KiB
Go
438 lines
11 KiB
Go
package huff0
|
|
|
|
import (
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/klauspost/compress/flate"
|
|
)
|
|
|
|
type inputFn func() ([]byte, error)
|
|
|
|
var testfiles = []struct {
|
|
name string
|
|
fn inputFn
|
|
err1X error
|
|
err4X error
|
|
}{
|
|
// Digits is the digits of the irrational number e. Its decimal representation
|
|
// does not repeat, but there are only 10 possible digits, so it should be
|
|
// reasonably compressible.
|
|
{name: "digits", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/e.txt") }},
|
|
// gettysburg.txt is a small plain text.
|
|
{name: "gettysburg", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/gettysburg.txt") }},
|
|
// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
|
|
{name: "twain", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") }},
|
|
// Random bytes
|
|
{name: "random", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/sharnd.out") }, err1X: ErrIncompressible, err4X: ErrIncompressible},
|
|
// Low entropy
|
|
{name: "low-ent.10k", fn: func() ([]byte, error) { return []byte(strings.Repeat("1221", 10000)), nil }},
|
|
// Super Low entropy
|
|
{name: "superlow-ent-10k", fn: func() ([]byte, error) { return []byte(strings.Repeat("1", 10000) + strings.Repeat("2", 500)), nil }},
|
|
// Zero bytes
|
|
{name: "zeroes", fn: func() ([]byte, error) { return make([]byte, 10000), nil }, err1X: ErrUseRLE, err4X: ErrUseRLE},
|
|
{name: "crash1", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/crash1.bin") }, err1X: ErrIncompressible, err4X: ErrIncompressible},
|
|
{name: "crash2", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/crash2.bin") }, err4X: ErrIncompressible},
|
|
{name: "crash3", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/crash3.bin") }, err1X: ErrIncompressible, err4X: ErrIncompressible},
|
|
{name: "endzerobits", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/endzerobits.bin") }, err1X: nil, err4X: ErrIncompressible},
|
|
{name: "endnonzero", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/endnonzero.bin") }, err4X: ErrIncompressible},
|
|
{name: "case1", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/case1.bin") }, err1X: nil},
|
|
{name: "case2", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/case2.bin") }, err1X: nil},
|
|
{name: "case3", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/case3.bin") }, err1X: nil},
|
|
{name: "pngdata.001", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/pngdata.bin") }, err1X: nil},
|
|
{name: "normcount2", fn: func() ([]byte, error) { return ioutil.ReadFile("../testdata/normcount2.bin") }, err1X: nil},
|
|
}
|
|
|
|
type fuzzInput struct {
|
|
name string
|
|
fn inputFn
|
|
}
|
|
|
|
var testfilesExtended []fuzzInput
|
|
|
|
func init() {
|
|
filepath.Walk("./fuzz/compress/corpus", func(path string, info os.FileInfo, err error) error {
|
|
if info.Size() == 0 || info.IsDir() {
|
|
return nil
|
|
}
|
|
testfilesExtended = append(testfilesExtended, fuzzInput{
|
|
name: filepath.Base(path),
|
|
fn: func() ([]byte, error) {
|
|
return ioutil.ReadFile(path)
|
|
},
|
|
})
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func TestCompress1X(t *testing.T) {
|
|
for _, test := range testfiles {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
var s Scratch
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
b, re, err := Compress1X(buf0, &s)
|
|
if err != test.err1X {
|
|
t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err)
|
|
}
|
|
if err != nil {
|
|
t.Log(test.name, err.Error())
|
|
return
|
|
}
|
|
if b == nil {
|
|
t.Error("got no output")
|
|
return
|
|
}
|
|
min := s.minSize(len(buf0))
|
|
if len(s.OutData) < min {
|
|
t.Errorf("output data length (%d) below shannon limit (%d)", len(s.OutData), min)
|
|
}
|
|
if len(s.OutTable) == 0 {
|
|
t.Error("got no table definition")
|
|
}
|
|
if re {
|
|
t.Error("claimed to have re-used.")
|
|
}
|
|
if len(s.OutData) == 0 {
|
|
t.Error("got no data output")
|
|
}
|
|
t.Logf("%s: %d -> %d bytes (%.2f:1) re:%t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable))
|
|
s.Out = nil
|
|
bRe, _, err := Compress1X(b, &s)
|
|
if err == nil {
|
|
t.Log("Could re-compress to", len(bRe))
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCompress4X(t *testing.T) {
|
|
for _, test := range testfiles {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
var s Scratch
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
b, re, err := Compress4X(buf0, &s)
|
|
if err != test.err4X {
|
|
t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err4X, err, err)
|
|
}
|
|
if err != nil {
|
|
t.Log(test.name, err.Error())
|
|
return
|
|
}
|
|
if b == nil {
|
|
t.Error("got no output")
|
|
return
|
|
}
|
|
if len(s.OutTable) == 0 {
|
|
t.Error("got no table definition")
|
|
}
|
|
if re {
|
|
t.Error("claimed to have re-used.")
|
|
}
|
|
if len(s.OutData) == 0 {
|
|
t.Error("got no data output")
|
|
}
|
|
|
|
t.Logf("%s: %d -> %d bytes (%.2f:1) %t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable))
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCompress1XReuse(t *testing.T) {
|
|
for _, test := range testfiles {
|
|
t.Run(test.name, func(t *testing.T) {
|
|
var s Scratch
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
b, re, err := Compress1X(buf0, &s)
|
|
if err != test.err1X {
|
|
t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err)
|
|
}
|
|
if err != nil {
|
|
t.Log(test.name, err.Error())
|
|
return
|
|
}
|
|
if b == nil {
|
|
t.Error("got no output")
|
|
return
|
|
}
|
|
firstData := len(s.OutData)
|
|
s.Reuse = ReusePolicyAllow
|
|
b, re, err = Compress1X(buf0, &s)
|
|
if err != nil {
|
|
t.Errorf("got secondary error %v (%T)", err, err)
|
|
return
|
|
}
|
|
if !re {
|
|
t.Error("Didn't re-use even if data was the same")
|
|
}
|
|
if len(s.OutTable) != 0 {
|
|
t.Error("got table definition, don't want any")
|
|
}
|
|
if len(s.OutData) == 0 {
|
|
t.Error("got no data output")
|
|
}
|
|
if len(b) != firstData {
|
|
t.Errorf("data length did not match first: %d, second:%d", firstData, len(b))
|
|
}
|
|
t.Logf("%s: %d -> %d bytes (%.2f:1) %t", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re)
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkDeflate(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err1X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
dec, err := flate.NewWriter(ioutil.Discard, flate.HuffmanOnly)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if test.err1X != nil {
|
|
b.Skip("skipping")
|
|
}
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
dec.Reset(ioutil.Discard)
|
|
n, err := dec.Write(buf0)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if n != len(buf0) {
|
|
b.Fatal("mismatch", n, len(buf0))
|
|
}
|
|
dec.Close()
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress1XReuseNone(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err1X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyNone
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress1X(buf0, &s)
|
|
if err != test.err1X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress1X(buf0, &s)
|
|
if re {
|
|
b.Fatal("reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress1XReuseAllow(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err1X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyAllow
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress1X(buf0, &s)
|
|
if err != test.err1X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress1X(buf0, &s)
|
|
if !re {
|
|
b.Fatal("not reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress1XReusePrefer(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err1X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyPrefer
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress1X(buf0, &s)
|
|
if err != test.err1X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress1X(buf0, &s)
|
|
if !re {
|
|
b.Fatal("not reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress4XReuseNone(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err4X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyNone
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress4X(buf0, &s)
|
|
if err != test.err1X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress4X(buf0, &s)
|
|
if re {
|
|
b.Fatal("reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress4XReuseAllow(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err4X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyAllow
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress4X(buf0, &s)
|
|
if err != test.err1X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress4X(buf0, &s)
|
|
if !re {
|
|
b.Fatal("not reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkCompress4XReusePrefer(b *testing.B) {
|
|
for _, tt := range testfiles {
|
|
test := tt
|
|
if test.err4X != nil {
|
|
continue
|
|
}
|
|
b.Run(test.name, func(b *testing.B) {
|
|
var s Scratch
|
|
s.Reuse = ReusePolicyPrefer
|
|
buf0, err := test.fn()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if len(buf0) > BlockSizeMax {
|
|
buf0 = buf0[:BlockSizeMax]
|
|
}
|
|
_, re, err := Compress4X(buf0, &s)
|
|
if err != test.err4X {
|
|
b.Fatal("unexpected error:", err)
|
|
}
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
b.SetBytes(int64(len(buf0)))
|
|
for i := 0; i < b.N; i++ {
|
|
_, re, _ = Compress4X(buf0, &s)
|
|
if !re {
|
|
b.Fatal("not reused")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|