Skip to content

Commit

Permalink
feat: return a list of RPM packages for easier processing
Browse files Browse the repository at this point in the history
Packages up until now were returned as a single string with each new line containing the package data.
Readers of this output had to do further processing to extract the relevant package data.
This single-string format was added to the plugin in order to match what the snyk-docker-plugin was expecting; it was used for backwards-compatibility.

Now in addition to this we return the list of packages in a new field in the parser response so it is easier to process them.

Additionally changed the following:
- exceptions thrown by parsing the RPM package entries are no longer ignored
- exceptions are now allowed to throw to the caller of the parser
- improved docs around some types
  • Loading branch information
ivanstanev committed Oct 11, 2020
1 parent 944fb0b commit 89482a5
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 86 deletions.
4 changes: 2 additions & 2 deletions lib/berkeleydb/database-pages.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DATABASE_PAGE_HEADER_SIZE, HASH_INDEX_ENTRY_SIZE } from './types';
import { ParserError } from '../types';
import { RpmParserError } from '../types';

/**
* Extract the values from a hash index, which is stored in a Hash DB page.
Expand All @@ -16,7 +16,7 @@ export function bufferToHashIndexValues(
// Hash table entries are always stored in pairs of 2.
if (entries % 2 !== 0) {
const pageNumber = page.readUInt32LE(8);
throw new ParserError('The number of entries must be a multiple of 2', {
throw new RpmParserError('The number of entries must be a multiple of 2', {
entries,
pageNumber,
});
Expand Down
4 changes: 2 additions & 2 deletions lib/berkeleydb/hash-pages.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DATABASE_PAGE_HEADER_SIZE, HashPageType } from './types';
import { ParserError } from '../types';
import { RpmParserError } from '../types';

/**
* Traverse the data (overflow) pages and extract the data.
Expand All @@ -24,7 +24,7 @@ export function bufferToHashValueContent(
// Note there may be 5 different page types of varying length, but we are interested only one.
const pageType = page.readUInt8(pageStartOffset);
if (pageType !== HashPageType.H_OFFPAGE) {
throw new ParserError('Unsupported page type', { pageType });
throw new RpmParserError('Unsupported page type', { pageType });
}

const startPageNumber = page.readUInt32LE(pageStartOffset + 4);
Expand Down
24 changes: 9 additions & 15 deletions lib/berkeleydb/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { eventLoopSpinner } from 'event-loop-spinner';
import { bufferToHashIndexValues } from './database-pages';
import { bufferToHashValueContent } from './hash-pages';
import { MagicNumber, DatabasePageType, HashPageType } from './types';
import { ParserError } from '../types';
import { RpmParserError } from '../types';

export { bufferToHashIndexValues, bufferToHashValueContent };

Expand Down Expand Up @@ -31,13 +31,8 @@ const validPageSizes: ReadonlyArray<number> = [
* - An Overflow page -- this page contains the data. The data may span multiple pages (hence "overflow" pages).
* @param data The contents of a BerkeleyDB database.
*/
export async function bufferToHashDbValues(
data: Buffer,
): Promise<Buffer[] | never> {
validateBerkeleyDbMetadata(data);

export async function bufferToHashDbValues(data: Buffer): Promise<Buffer[]> {
const pageSize = data.readUInt32LE(20);
validatePageSize(pageSize);

const lastPageNumber = data.readUInt32LE(32);

Expand Down Expand Up @@ -85,25 +80,24 @@ export async function bufferToHashDbValues(
return result;
}

/**
* Exported for testing
*/
export function validateBerkeleyDbMetadata(data: Buffer): void | never {
// We are only interested in Hash DB. Other types are B-Tree, Queue, Heap, etc.
const magicNumber = data.readUInt32LE(12);
if (magicNumber !== MagicNumber.DB_HASH) {
throw new ParserError('Unexpected database magic number', { magicNumber });
throw new RpmParserError('Unexpected database magic number', {
magicNumber,
});
}

// The first page of the database must be a Hash DB metadata page.
const pageType = data.readUInt8(25);
if (pageType !== DatabasePageType.P_HASHMETA) {
throw new ParserError('Unexpected page type', { pageType });
throw new RpmParserError('Unexpected page type', { pageType });
}

const encryptionAlgorithm = data.readUInt8(24);
if (encryptionAlgorithm !== 0) {
throw new ParserError('Encrypted databases are not supported', {
throw new RpmParserError('Encrypted databases are not supported', {
encryptionAlgorithm,
});
}
Expand All @@ -113,7 +107,7 @@ export function validateBerkeleyDbMetadata(data: Buffer): void | never {
// packages on the system. We don't want to allocate too much memory.
const entriesCount = data.readUInt32LE(88);
if (entriesCount < 0 || entriesCount > 50_000) {
throw new ParserError('Invalid number of entries in the database', {
throw new RpmParserError('Invalid number of entries in the database', {
entriesCount,
});
}
Expand All @@ -124,6 +118,6 @@ export function validateBerkeleyDbMetadata(data: Buffer): void | never {
*/
export function validatePageSize(pageSize: number): void | never {
if (!validPageSizes.includes(pageSize)) {
throw new ParserError('Invalid page size', { pageSize });
throw new RpmParserError('Invalid page size', { pageSize });
}
}
71 changes: 32 additions & 39 deletions lib/index.ts
Original file line number Diff line number Diff line change
@@ -1,52 +1,45 @@
import { bufferToHashDbValues } from './berkeleydb';
import {
bufferToHashDbValues,
validateBerkeleyDbMetadata,
validatePageSize,
} from './berkeleydb';
import { bufferToPackageInfo } from './rpm';
import { PackageInfo } from './rpm/types';
import { IParserResponse } from './types';
import { RpmParserResponse } from './types';

/**
* Get a list of packages given a Buffer that contains an RPM database in BerkeleyDB format.
* The database is inspected as best-effort, returning all valid/readable entries.
* @param data An RPM database in BerkeleyDB format.
* @deprecated Should use snyk/dep-graph. The response format is kept for backwards compatibility with snyk/kubernetes-monitor.
*/
export async function getPackages(data: Buffer): Promise<IParserResponse> {
try {
const berkeleyDbValues = await bufferToHashDbValues(data);

let packagesSkipped = 0;
let packagesProcessed = 0;

const rpmPackageInfos = new Array<PackageInfo>();
for (const entry of berkeleyDbValues) {
try {
const packageInfo = await bufferToPackageInfo(entry);
if (packageInfo !== undefined) {
rpmPackageInfos.push(packageInfo);
packagesProcessed += 1;
} else {
packagesSkipped += 1;
}
} catch (error) {
packagesSkipped += 1;
}
}
export async function getPackages(data: Buffer): Promise<RpmParserResponse> {
validateBerkeleyDbMetadata(data);

const pageSize = data.readUInt32LE(20);
validatePageSize(pageSize);

const berkeleyDbValues = await bufferToHashDbValues(data);

let packagesSkipped = 0;

const formattedPackages = formatRpmPackages(rpmPackageInfos);
const response = formattedPackages.join('\n');

return {
response,
rpmMetadata: {
packagesProcessed,
packagesSkipped,
},
};
} catch (error) {
return {
response: '',
error,
};
const rpmPackageInfos = new Array<PackageInfo>();
for (const entry of berkeleyDbValues) {
const packageInfo = await bufferToPackageInfo(entry);
if (packageInfo !== undefined) {
rpmPackageInfos.push(packageInfo);
} else {
packagesSkipped += 1;
}
}

const formattedPackages = formatRpmPackages(rpmPackageInfos);
const response = formattedPackages.join('\n');

return {
response,
packagesSkipped,
packages: rpmPackageInfos,
};
}

function formatRpmPackages(packages: PackageInfo[]): string[] {
Expand Down
18 changes: 11 additions & 7 deletions lib/rpm/extensions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { eventLoopSpinner } from 'event-loop-spinner';

import { IndexEntry, PackageInfo, RpmTag, RpmType } from './types';
import { ParserError } from '../types';
import { RpmParserError } from '../types';

/**
* Iterate through RPM metadata entries to build the full package data.
Expand All @@ -10,13 +10,17 @@ import { ParserError } from '../types';
export async function getPackageInfo(
entries: IndexEntry[],
): Promise<PackageInfo | undefined> {
/**
* All of the entries in an RPM package are optional.
* We try to collect as much info as possible and finally we check if it's enough to construct a package.
*/
const packageInfo: Partial<PackageInfo> = {};

for (const entry of entries) {
switch (entry.info.tag) {
case RpmTag.NAME:
if (entry.info.type !== RpmType.STRING) {
throw new ParserError('Unexpected type for name tag', {
throw new RpmParserError('Unexpected type for name tag', {
type: entry.info.type,
});
}
Expand All @@ -25,7 +29,7 @@ export async function getPackageInfo(

case RpmTag.RELEASE:
if (entry.info.type !== RpmType.STRING) {
throw new ParserError('Unexpected type for release tag', {
throw new RpmParserError('Unexpected type for release tag', {
type: entry.info.type,
});
}
Expand All @@ -34,7 +38,7 @@ export async function getPackageInfo(

case RpmTag.ARCH:
if (entry.info.type !== RpmType.STRING) {
throw new ParserError('Unexpected type for arch tag', {
throw new RpmParserError('Unexpected type for arch tag', {
type: entry.info.type,
});
}
Expand All @@ -43,7 +47,7 @@ export async function getPackageInfo(

case RpmTag.EPOCH:
if (entry.info.type !== RpmType.INT32) {
throw new ParserError('Unexpected type for epoch tag', {
throw new RpmParserError('Unexpected type for epoch tag', {
type: entry.info.type,
});
}
Expand All @@ -52,7 +56,7 @@ export async function getPackageInfo(

case RpmTag.SIZE:
if (entry.info.type !== RpmType.INT32) {
throw new ParserError('Unexpected type for size tag', {
throw new RpmParserError('Unexpected type for size tag', {
type: entry.info.type,
});
}
Expand All @@ -61,7 +65,7 @@ export async function getPackageInfo(

case RpmTag.VERSION:
if (entry.info.type !== RpmType.STRING) {
throw new ParserError('Unexpected type for version tag', {
throw new RpmParserError('Unexpected type for version tag', {
type: entry.info.type,
});
}
Expand Down
4 changes: 2 additions & 2 deletions lib/rpm/header.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { eventLoopSpinner } from 'event-loop-spinner';

import { IndexEntry, ENTRY_INFO_SIZE, EntryInfo } from './types';
import { ParserError } from '../types';
import { RpmParserError } from '../types';

/**
* Transform a blob of metadadata into addressable RPM package entries.
Expand All @@ -14,7 +14,7 @@ export async function headerImport(data: Buffer): Promise<IndexEntry[]> {

if (indexLength <= 0 || indexLength > 50_000) {
// Ensure we don't allocate something crazy...
throw new ParserError('Invalid index length', { indexLength });
throw new RpmParserError('Invalid index length', { indexLength });
}

const entryInfos = new Array<EntryInfo>();
Expand Down
33 changes: 26 additions & 7 deletions lib/types.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,34 @@
export interface IParserResponse {
import { PackageInfo } from './rpm/types';

export interface RpmParserResponse {
/**
* Returns all packages detected in an RPM database file.
* Currently this is a flat list of dependencies and not a dependency graph.
*/
packages: PackageInfo[];

/**
* Returns the list of packages, each stored in a new line. Packages include the following attributes:
* name, version, epoch, size, architecture, and release. The attributes are delimited by a tab.
* @deprecated Use "packages" instead, which makes it easier to parse entries.
*/
response: string;
rpmMetadata?: IRpmMetadata;
error?: ParserError;
}

export interface IRpmMetadata {
packagesProcessed: number;
/**
* Contains a count of the entries that could not be processed as RPM packages.
* In all circumstances this should be 0. However, if it does contain skipped packages
* then it may indicate:
* 1. A corrupt or bad RPM package/entry, or
* 2. A limitation of the parser - possible reasons could be a new RPM version or a bug in the parsing logic.
*/
packagesSkipped: number;
}

export class ParserError extends Error {
/**
* Thrown on any encountered exception by the parser.
* Includes a "context" object to pass extra information about the error.
*/
export class RpmParserError extends Error {
readonly context: unknown | undefined;

constructor(message: string, context?: unknown) {
Expand Down
16 changes: 4 additions & 12 deletions test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,10 @@ describe('Testing various RPM databases', () => {

const parserOutput = await getPackages(rpmDb);

expect(parserOutput.error).toBeUndefined();
expect(parserOutput.rpmMetadata).toBeDefined();
expect(parserOutput.rpmMetadata!.packagesSkipped).toEqual(0);

const expectedEntries = expectedOutput
.trim()
.split('\n')
.sort();
const parserEntries = parserOutput.response
.trim()
.split('\n')
.sort();
expect(parserOutput.packagesSkipped).toEqual(0);

const expectedEntries = expectedOutput.trim().split('\n').sort();
const parserEntries = parserOutput.response.trim().split('\n').sort();

for (let j = 0; j < expectedEntries.length; j++) {
const expectedEntry = expectedEntries[j];
Expand Down

0 comments on commit 89482a5

Please sign in to comment.