-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecompress.go
214 lines (178 loc) · 5.98 KB
/
decompress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
package safexz
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
internal "github.com/christoofar/safexz/internal"
)
// This is a convenience function that takes a compressed LZMA data block and converts the result to a string.
// Note: Strings are heavily dependent on the codepage you use to work with. If what you are compressing falls
// outside the scope of the codepage, you probably won't get the same string back.
func DecompressString(compressedString []byte) (string, error) {
decompressed, err := DecompressBytes(compressedString)
if err != nil {
return "", err
}
return string(decompressed), nil
}
// DecompressBytes takes a byte slice of compressed LZMA data and returns the decompressed data as a byte slice. Be careful with this function
// as it can consume a lot of memory if you are decompressing a large file and you've read the entire file into memory into a byte slice.
// If you are decompressing a large file, consider using DecompressFile instead or use DecompressStream to decompress the data on the fly which
// uses much less memory and is more efficient.
func DecompressBytes(compressedBytes []byte) ([]byte, error) {
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
internal.DecompressIn(readchan, writechan)
go func() {
for i := 0; i < len(compressedBytes); i += internal.MAX_BUF_SIZE {
end := i + internal.MAX_BUF_SIZE
if end > len(compressedBytes) {
end = len(compressedBytes)
}
readchan <- compressedBytes[i:end]
}
close(readchan)
}()
var decompressed []byte
for data := range writechan {
decompressed = append(decompressed, data...)
}
return decompressed, nil
}
// DecompressFile reads a file from the filesystem and decompresses it to another file on the filesystem. The input file should end with the `.xz` extension.
func DecompressFile(input_path, output_path string) error {
return DecompressFileWithProgress(input_path, output_path, nil)
}
// DecompressFileWithProgress reads a file from the filesystem and decompresses it to another file on the filesystem. The input file should end with the `.xz` extension.
// Your progress callback function that you supply will be called with the number of bytes read and written to the output file. This is useful for showing progress bars.
// The first 'uint64' is the number of bytes read from the input file, and the second 'uint64' is the number of bytes written to the output file. From this you can calculate
// the percentage of the file that has been compressed, the estimated time remaining, etc.
func DecompressFileWithProgress(inpath, outpath string, progress func(uint64, uint64)) error {
// Check the file extension
extension := filepath.Ext(inpath)
fileExtension := extension[1:]
if fileExtension != "xz" {
return fmt.Errorf("the input file [%s] should probably have an xz extension, can you go look?", outpath)
}
f, err := os.Open(inpath)
if err != nil {
return err
}
readchan := make(chan []byte, 1)
writechan := make(chan []byte, 1)
internal.DecompressIn(readchan, writechan)
var readCount uint64
var writeCount uint64
readfunc := func() {
readbuf := make([]byte, internal.MAX_BUF_SIZE)
for {
bytes, err := f.Read(readbuf)
readCount += uint64(bytes)
if progress != nil && readCount%16550 == 0 {
progress(readCount, writeCount)
}
if err != nil { // The EOF has been hit, send the final batch
readchan <- readbuf[:bytes]
close(readchan)
break
}
data := make([]byte, bytes)
copy(data, readbuf)
readchan <- data
}
}
// If the outpath exists, delete it
if _, err := os.Stat(outpath); err == nil {
err := os.Remove(outpath)
if err != nil {
return err
}
}
outfile, err := os.Create(outpath)
if err != nil {
return err
}
go readfunc()
donewrite := make(chan bool, 1)
go func() {
for data := range writechan {
outfile.Write(data)
if len(data) > 0 {
if progress != nil {
writeCount += uint64(len(data))
progress(readCount, writeCount)
}
}
}
donewrite <- true
}()
<-donewrite
outfile.Close()
return nil
}
// DecompressFileToMemory reads a file from the filesystem and decompresses it into memory as a byte slice buffer.
// This is useful for small files that you want to decompress and then work with in memory, such as scanning compressed logs.
func DecompressFileToMemory(path string) ([]byte, error) {
// Check the file extension
extension := filepath.Ext(path)
fileExtension := extension[1:]
if fileExtension != "xz" {
return []byte{}, fmt.Errorf("the input file [%s] should probably have an xz extension, can you go look?", path)
}
f, err := os.Open(path)
if err != nil {
return []byte{}, err
}
inputchan := make(chan []byte, 1)
outputchan := make(chan []byte, 1)
go func() {
internal.DecompressIn(inputchan, outputchan)
}()
go func() {
for {
readbuf := make([]byte, internal.MAX_BUF_SIZE)
bytes, err := f.Read(readbuf)
if err != nil {
close(inputchan)
break
}
inputchan <- readbuf[:bytes]
}}()
outputbuf := bytes.Buffer{}
for data := range outputchan {
_, err := outputbuf.Write(data)
if err != nil {
return []byte{}, err
}
}
return outputbuf.Bytes(), nil
}
// DecompressStream skips a call to io.Copy() by just decompressing whatever stream you put in the
// input reader and writing it to the output writer. Useful when you want to decompress data on the fly
// from a stream source like a network connection or a websocket. Note: Neiher CompressStream nor DecompressStream
// actually use XZReader or XZWriter. They are just there for the sake of the ABI.
func DecompressStream(input io.Reader, output io.Writer) error {
inputchan := make(chan []byte, 1)
outputchan := make(chan []byte, 1)
go func() {
internal.DecompressIn(inputchan, outputchan)
}()
readbuf := make([]byte, internal.MAX_BUF_SIZE)
for {
bytes, err := input.Read(readbuf)
if err != nil {
close(inputchan)
break
}
inputchan <- readbuf[:bytes]
}
for data := range outputchan {
_, err := output.Write(data)
if err != nil {
return err
}
}
return nil
}