Skip to content

Commit

Permalink
refactor: update snappy frame decompress
Browse files Browse the repository at this point in the history
  • Loading branch information
wemeetagain committed Jan 6, 2025
1 parent ad8c10e commit d39ecb1
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
import crc32c from "@chainsafe/fast-crc32c";

export enum ChunkType {
IDENTIFIER = 0xff,
COMPRESSED = 0x00,
UNCOMPRESSED = 0x01,
PADDING = 0xfe,
SKIPPABLE = 0x80,
}

export const IDENTIFIER = Buffer.from([0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]);
export const IDENTIFIER_FRAME = Buffer.from([0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59]);

/**
* As per the snappy framing format for streams, the size of any uncompressed chunk can be
* no longer than 65536 bytes.
*
* From: https://github.com/google/snappy/blob/main/framing_format.txt#L90:L92
*/
export const UNCOMPRESSED_CHUNK_SIZE = 65536;

export function crc(value: Uint8Array): Buffer {
// this function doesn't actually need a buffer
// see https://github.com/napi-rs/node-rs/blob/main/packages/crc32/index.d.ts
const x = crc32c.calculate(value as Buffer);
const result = Buffer.allocUnsafe?.(4) ?? Buffer.alloc(4);

// As defined in section 3 of https://github.com/google/snappy/blob/master/framing_format.txt
// And other implementations for reference:
// Go: https://github.com/golang/snappy/blob/2e65f85255dbc3072edf28d6b5b8efc472979f5a/snappy.go#L97
// Python: https://github.com/andrix/python-snappy/blob/602e9c10d743f71bef0bac5e4c4dffa17340d7b3/snappy/snappy.py#L70
// Mask the right hand to (32 - 17) = 15 bits -> 0x7fff, to keep correct 32 bit values.
// Shift the left hand with >>> for correct 32 bit intermediate result.
// Then final >>> 0 for 32 bits output
result.writeUInt32LE((((x >>> 15) | ((x & 0x7fff) << 17)) + 0xa282ead8) >>> 0, 0);

return result;
}
Original file line number Diff line number Diff line change
@@ -1,33 +1,8 @@
import crc32c from "@chainsafe/fast-crc32c";
import snappy from "snappy";
import {ChunkType, IDENTIFIER_FRAME} from "./common.js";
import {crc, ChunkType, IDENTIFIER_FRAME, UNCOMPRESSED_CHUNK_SIZE} from "./common.js";

// The logic in this file is largely copied (in simplified form) from https://github.com/ChainSafe/node-snappy-stream/

/**
* As per the snappy framing format for streams, the size of any uncompressed chunk can be
* no longer than 65536 bytes.
*
* From: https://github.com/google/snappy/blob/main/framing_format.txt#L90:L92
*/
const UNCOMPRESSED_CHUNK_SIZE = 65536;

function checksum(value: Buffer): Buffer {
const x = crc32c.calculate(value);
const result = Buffer.allocUnsafe?.(4) ?? Buffer.alloc(4);

// As defined in section 3 of https://github.com/google/snappy/blob/master/framing_format.txt
// And other implementations for reference:
// Go: https://github.com/golang/snappy/blob/2e65f85255dbc3072edf28d6b5b8efc472979f5a/snappy.go#L97
// Python: https://github.com/andrix/python-snappy/blob/602e9c10d743f71bef0bac5e4c4dffa17340d7b3/snappy/snappy.py#L70
// Mask the right hand to (32 - 17) = 15 bits -> 0x7fff, to keep correct 32 bit values.
// Shift the left hand with >>> for correct 32 bit intermediate result.
// Then final >>> 0 for 32 bits output
result.writeUInt32LE((((x >>> 15) | ((x & 0x7fff) << 17)) + 0xa282ead8) >>> 0, 0);

return result;
}

export async function* encodeSnappy(bytes: Buffer): AsyncGenerator<Buffer> {
yield IDENTIFIER_FRAME;

Expand All @@ -36,17 +11,13 @@ export async function* encodeSnappy(bytes: Buffer): AsyncGenerator<Buffer> {
const compressed = snappy.compressSync(chunk);
if (compressed.length < chunk.length) {
const size = compressed.length + 4;
yield Buffer.concat([
Buffer.from([ChunkType.COMPRESSED, size, size >> 8, size >> 16]),
checksum(chunk),
compressed,
]);
yield Buffer.concat([Buffer.from([ChunkType.COMPRESSED, size, size >> 8, size >> 16]), crc(chunk), compressed]);
} else {
const size = chunk.length + 4;
yield Buffer.concat([
//
Buffer.from([ChunkType.UNCOMPRESSED, size, size >> 8, size >> 16]),
checksum(chunk),
crc(chunk),
chunk,
]);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {uncompress} from "snappyjs";
import {Uint8ArrayList} from "uint8arraylist";
import {ChunkType, IDENTIFIER} from "./common.js";
import {ChunkType, crc, IDENTIFIER, UNCOMPRESSED_CHUNK_SIZE} from "./common.js";

export class SnappyFramesUncompress {
private buffer = new Uint8ArrayList();
Expand All @@ -21,32 +21,45 @@ export class SnappyFramesUncompress {
if (this.buffer.length < 4) break;

const type = getChunkType(this.buffer.get(0));

if (!this.state.foundIdentifier && type !== ChunkType.IDENTIFIER) {
throw "malformed input: must begin with an identifier";
}

const frameSize = getFrameSize(this.buffer, 1);

if (this.buffer.length - 4 < frameSize) {
break;
}

const data = this.buffer.subarray(4, 4 + frameSize);
const frame = this.buffer.subarray(4, 4 + frameSize);
this.buffer.consume(4 + frameSize);

if (!this.state.foundIdentifier && type !== ChunkType.IDENTIFIER) {
throw "malformed input: must begin with an identifier";
}

if (type === ChunkType.IDENTIFIER) {
if (!Buffer.prototype.equals.call(data, IDENTIFIER)) {
throw "malformed input: bad identifier";
switch (type) {
case ChunkType.IDENTIFIER: {
if (!Buffer.prototype.equals.call(frame, IDENTIFIER)) {
throw "malformed input: bad identifier";
}
this.state.foundIdentifier = true;
continue;
}
this.state.foundIdentifier = true;
continue;
}
case ChunkType.PADDING:
case ChunkType.SKIPPABLE:
continue;
case ChunkType.COMPRESSED:
case ChunkType.UNCOMPRESSED: {
const checksum = frame.subarray(0, 4);
const data = frame.subarray(4);

if (type === ChunkType.COMPRESSED) {
result.append(uncompress(data.subarray(4)));
}
if (type === ChunkType.UNCOMPRESSED) {
result.append(data.subarray(4));
const uncompressed = type === ChunkType.COMPRESSED ? uncompress(data, UNCOMPRESSED_CHUNK_SIZE) : data;
if (uncompressed.length > UNCOMPRESSED_CHUNK_SIZE) {
throw "malformed input: too large";
}
if (crc(uncompressed).compare(checksum) !== 0) {
throw "malformed input: bad checksum";
}
result.append(uncompressed);
}
}
}
if (result.length === 0) {
Expand Down Expand Up @@ -82,6 +95,10 @@ function getChunkType(value: number): ChunkType {
case ChunkType.PADDING:
return ChunkType.PADDING;
default:
// https://github.com/google/snappy/blob/main/framing_format.txt#L129
if (value >= 0x80 && value <= 0xfd) {
return ChunkType.SKIPPABLE;
}
throw new Error("Unsupported snappy chunk type");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {Uint8ArrayList} from "uint8arraylist";
import {describe, expect, it} from "vitest";
import {encodeSnappy} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/compress.js";
import {SnappyFramesUncompress} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/uncompress.js";
import {ChunkType, crc, IDENTIFIER_FRAME} from "../../../../../src/encodingStrategies/sszSnappy/snappyFrames/common.js";

describe("encodingStrategies / sszSnappy / snappy frames / uncompress", () => {
it("should work with short input", () =>
Expand Down Expand Up @@ -56,4 +57,43 @@ describe("encodingStrategies / sszSnappy / snappy frames / uncompress", () => {

expect(decompress.uncompress(new Uint8ArrayList(Buffer.alloc(3, 1)))).toBe(null);
});

it("should detect invalid checksum", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

chunks.append(Uint8Array.from([ChunkType.UNCOMPRESSED, 0x80, 0x00, 0x00]));
// first 4 bytes are checksum
// 0xffffffff is clearly an invalid checksum
chunks.append(Uint8Array.from(Array.from({length: 0x80}, () => 0xff)));

const decompress = new SnappyFramesUncompress();
expect(() => decompress.uncompress(chunks)).toThrow(/checksum/);
});

it("should detect skippable frames", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

chunks.append(Uint8Array.from([ChunkType.SKIPPABLE, 0x80, 0x00, 0x00]));
chunks.append(Uint8Array.from(Array.from({length: 0x80}, () => 0xff)));

const decompress = new SnappyFramesUncompress();
expect(decompress.uncompress(chunks)).toBeNull();
});

it("should detect large data", () => {
const chunks = new Uint8ArrayList();
chunks.append(IDENTIFIER_FRAME);

// add a chunk of size 100000
chunks.append(Uint8Array.from([ChunkType.UNCOMPRESSED, 160, 134, 1]));
const data = Uint8Array.from(Array.from({length: 100000 - 4}, () => 0xff));
const checksum = crc(data);
chunks.append(checksum);
chunks.append(data);

const decompress = new SnappyFramesUncompress();
expect(() => decompress.uncompress(chunks)).toThrow(/large/);
});
});

0 comments on commit d39ecb1

Please sign in to comment.