diff --git a/go.mod b/go.mod index b5e29fe707..c04f91bd3c 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( golang.org/x/crypto v0.12.0 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 golang.org/x/sync v0.3.0 + github.com/icza/bitio v1.1.0 ) require ( diff --git a/go.sum b/go.sum index db1b924c9e..5e83fcfbc6 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,9 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b h1:h9U78+dx9a4BKdQkBBos92HalKpaGKHrp+3Uo6yTodo= github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/icza/bitio v1.1.0 h1:ysX4vtldjdi3Ygai5m1cWy4oLkhWTAi+SyO6HC8L9T0= +github.com/icza/bitio v1.1.0/go.mod h1:0jGnlLAx8MKMr9VGnn/4YrvZiprkvBelsVIbA9Jjr9A= +github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6/go.mod h1:xQig96I1VNBDIWGCdTt54nHt6EeI639SmHycLYL7FkA= github.com/klauspost/compress v1.17.1 h1:NE3C767s2ak2bweCZo3+rdP4U/HoyVXLv/X9f2gPS5g= github.com/klauspost/compress v1.17.1/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= diff --git a/std/compress/pipeline.go b/std/compress/pipeline.go index 274fcfcc8f..e241779572 100644 --- a/std/compress/pipeline.go +++ b/std/compress/pipeline.go @@ -1,5 +1,12 @@ package compress +import ( + "bytes" + "fmt" + "github.com/icza/bitio" + "strconv" +) + // Streams and pipelines are inefficient data structures used for easy experimentation with compression algorithms. // They make it easy to swap modules in and out. @@ -8,11 +15,11 @@ type Stream struct { NbSymbs int } -func (s Stream) Len() int { +func (s *Stream) Len() int { return len(s.D) } -func (s Stream) RunLen(i int) int { +func (s *Stream) RunLen(i int) int { runLen := 1 for i+runLen < len(s.D) && s.D[i+runLen] == 0 { runLen++ @@ -20,7 +27,7 @@ func (s Stream) RunLen(i int) int { return runLen } -func (s Stream) At(i int) int { +func (s *Stream) At(i int) int { return s.D[i] } @@ -32,6 +39,25 @@ func NewStreamFromBytes(in []byte) Stream { return Stream{d, 256} } +func NewStream[V any](slice []V, srcBitLen, streamBitLen int) Stream { + if srcBitLen%streamBitLen != 0 { + panic("not implemented") + } + dstPerSrc := srcBitLen / streamBitLen + d := make([]int, dstPerSrc*len(slice)) + + for i := range d { + if intVal, err := strconv.Atoi(fmt.Sprint(slice[i])); err != nil { // not intended to be fast + panic(err) + } else { + indexWithinWord := i % dstPerSrc + d[i] = (uint(intVal) >> (streamBitLen * indexWithinWord)) & ((1 << streamBitLen) - 1) + } + } + + return Stream{d, 1 << streamBitLen} +} + type Pipeline []func(Stream) Stream func (pipeline Pipeline) Run(in Stream) Stream { @@ -40,3 +66,89 @@ func (pipeline Pipeline) Run(in Stream) Stream { } return in } + +func (s *Stream) WriteNum(r int, nbWords int) *Stream { + for i := 0; i < nbWords; i++ { + s.D = append(s.D, r%s.NbSymbs) + r /= s.NbSymbs + } + if r != 0 { + panic("overflow") + } + return s +} + +func (s *Stream) ReadNum(start, nbWords int) int { + res := 0 + for j := nbWords - 1; j >= 0; j-- { + res *= s.NbSymbs + res += s.D[start+j] + } + return res +} + +func bitLen(n int) int { + bitLen := 0 + for 1<