diff --git a/lib/berkeleydb/database-pages.ts b/lib/berkeleydb/database-pages.ts index f46ac04..7c5a78f 100644 --- a/lib/berkeleydb/database-pages.ts +++ b/lib/berkeleydb/database-pages.ts @@ -1,5 +1,5 @@ import { DATABASE_PAGE_HEADER_SIZE, HASH_INDEX_ENTRY_SIZE } from './types'; -import { ParserError } from '../types'; +import { RpmParserError } from '../types'; /** * Extract the values from a hash index, which is stored in a Hash DB page. @@ -16,7 +16,7 @@ export function bufferToHashIndexValues( // Hash table entries are always stored in pairs of 2. if (entries % 2 !== 0) { const pageNumber = page.readUInt32LE(8); - throw new ParserError('The number of entries must be a multiple of 2', { + throw new RpmParserError('The number of entries must be a multiple of 2', { entries, pageNumber, }); diff --git a/lib/berkeleydb/hash-pages.ts b/lib/berkeleydb/hash-pages.ts index 317df5e..e772b2c 100644 --- a/lib/berkeleydb/hash-pages.ts +++ b/lib/berkeleydb/hash-pages.ts @@ -1,5 +1,5 @@ import { DATABASE_PAGE_HEADER_SIZE, HashPageType } from './types'; -import { ParserError } from '../types'; +import { RpmParserError } from '../types'; /** * Traverse the data (overflow) pages and extract the data. @@ -24,7 +24,7 @@ export function bufferToHashValueContent( // Note there may be 5 different page types of varying length, but we are interested only one. const pageType = page.readUInt8(pageStartOffset); if (pageType !== HashPageType.H_OFFPAGE) { - throw new ParserError('Unsupported page type', { pageType }); + throw new RpmParserError('Unsupported page type', { pageType }); } const startPageNumber = page.readUInt32LE(pageStartOffset + 4); diff --git a/lib/berkeleydb/index.ts b/lib/berkeleydb/index.ts index 02eb364..40d71bb 100644 --- a/lib/berkeleydb/index.ts +++ b/lib/berkeleydb/index.ts @@ -3,7 +3,7 @@ import { eventLoopSpinner } from 'event-loop-spinner'; import { bufferToHashIndexValues } from './database-pages'; import { bufferToHashValueContent } from './hash-pages'; import { MagicNumber, DatabasePageType, HashPageType } from './types'; -import { ParserError } from '../types'; +import { RpmParserError } from '../types'; export { bufferToHashIndexValues, bufferToHashValueContent }; @@ -31,13 +31,8 @@ const validPageSizes: ReadonlyArray = [ * - An Overflow page -- this page contains the data. The data may span multiple pages (hence "overflow" pages). * @param data The contents of a BerkeleyDB database. */ -export async function bufferToHashDbValues( - data: Buffer, -): Promise { - validateBerkeleyDbMetadata(data); - +export async function bufferToHashDbValues(data: Buffer): Promise { const pageSize = data.readUInt32LE(20); - validatePageSize(pageSize); const lastPageNumber = data.readUInt32LE(32); @@ -85,25 +80,24 @@ export async function bufferToHashDbValues( return result; } -/** - * Exported for testing - */ export function validateBerkeleyDbMetadata(data: Buffer): void | never { // We are only interested in Hash DB. Other types are B-Tree, Queue, Heap, etc. const magicNumber = data.readUInt32LE(12); if (magicNumber !== MagicNumber.DB_HASH) { - throw new ParserError('Unexpected database magic number', { magicNumber }); + throw new RpmParserError('Unexpected database magic number', { + magicNumber, + }); } // The first page of the database must be a Hash DB metadata page. const pageType = data.readUInt8(25); if (pageType !== DatabasePageType.P_HASHMETA) { - throw new ParserError('Unexpected page type', { pageType }); + throw new RpmParserError('Unexpected page type', { pageType }); } const encryptionAlgorithm = data.readUInt8(24); if (encryptionAlgorithm !== 0) { - throw new ParserError('Encrypted databases are not supported', { + throw new RpmParserError('Encrypted databases are not supported', { encryptionAlgorithm, }); } @@ -113,7 +107,7 @@ export function validateBerkeleyDbMetadata(data: Buffer): void | never { // packages on the system. We don't want to allocate too much memory. const entriesCount = data.readUInt32LE(88); if (entriesCount < 0 || entriesCount > 50_000) { - throw new ParserError('Invalid number of entries in the database', { + throw new RpmParserError('Invalid number of entries in the database', { entriesCount, }); } @@ -124,6 +118,6 @@ export function validateBerkeleyDbMetadata(data: Buffer): void | never { */ export function validatePageSize(pageSize: number): void | never { if (!validPageSizes.includes(pageSize)) { - throw new ParserError('Invalid page size', { pageSize }); + throw new RpmParserError('Invalid page size', { pageSize }); } } diff --git a/lib/index.ts b/lib/index.ts index 0fae98b..414c14c 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -1,52 +1,45 @@ -import { bufferToHashDbValues } from './berkeleydb'; +import { + bufferToHashDbValues, + validateBerkeleyDbMetadata, + validatePageSize, +} from './berkeleydb'; import { bufferToPackageInfo } from './rpm'; import { PackageInfo } from './rpm/types'; -import { IParserResponse } from './types'; +import { RpmParserResponse } from './types'; /** * Get a list of packages given a Buffer that contains an RPM database in BerkeleyDB format. * The database is inspected as best-effort, returning all valid/readable entries. * @param data An RPM database in BerkeleyDB format. - * @deprecated Should use snyk/dep-graph. The response format is kept for backwards compatibility with snyk/kubernetes-monitor. */ -export async function getPackages(data: Buffer): Promise { - try { - const berkeleyDbValues = await bufferToHashDbValues(data); - - let packagesSkipped = 0; - let packagesProcessed = 0; - - const rpmPackageInfos = new Array(); - for (const entry of berkeleyDbValues) { - try { - const packageInfo = await bufferToPackageInfo(entry); - if (packageInfo !== undefined) { - rpmPackageInfos.push(packageInfo); - packagesProcessed += 1; - } else { - packagesSkipped += 1; - } - } catch (error) { - packagesSkipped += 1; - } - } +export async function getPackages(data: Buffer): Promise { + validateBerkeleyDbMetadata(data); + + const pageSize = data.readUInt32LE(20); + validatePageSize(pageSize); + + const berkeleyDbValues = await bufferToHashDbValues(data); + + let packagesSkipped = 0; - const formattedPackages = formatRpmPackages(rpmPackageInfos); - const response = formattedPackages.join('\n'); - - return { - response, - rpmMetadata: { - packagesProcessed, - packagesSkipped, - }, - }; - } catch (error) { - return { - response: '', - error, - }; + const rpmPackageInfos = new Array(); + for (const entry of berkeleyDbValues) { + const packageInfo = await bufferToPackageInfo(entry); + if (packageInfo !== undefined) { + rpmPackageInfos.push(packageInfo); + } else { + packagesSkipped += 1; + } } + + const formattedPackages = formatRpmPackages(rpmPackageInfos); + const response = formattedPackages.join('\n'); + + return { + response, + packagesSkipped, + packages: rpmPackageInfos, + }; } function formatRpmPackages(packages: PackageInfo[]): string[] { diff --git a/lib/rpm/extensions.ts b/lib/rpm/extensions.ts index 896dd0f..b345391 100644 --- a/lib/rpm/extensions.ts +++ b/lib/rpm/extensions.ts @@ -1,7 +1,7 @@ import { eventLoopSpinner } from 'event-loop-spinner'; import { IndexEntry, PackageInfo, RpmTag, RpmType } from './types'; -import { ParserError } from '../types'; +import { RpmParserError } from '../types'; /** * Iterate through RPM metadata entries to build the full package data. @@ -10,13 +10,17 @@ import { ParserError } from '../types'; export async function getPackageInfo( entries: IndexEntry[], ): Promise { + /** + * All of the entries in an RPM package are optional. + * We try to collect as much info as possible and finally we check if it's enough to construct a package. + */ const packageInfo: Partial = {}; for (const entry of entries) { switch (entry.info.tag) { case RpmTag.NAME: if (entry.info.type !== RpmType.STRING) { - throw new ParserError('Unexpected type for name tag', { + throw new RpmParserError('Unexpected type for name tag', { type: entry.info.type, }); } @@ -25,7 +29,7 @@ export async function getPackageInfo( case RpmTag.RELEASE: if (entry.info.type !== RpmType.STRING) { - throw new ParserError('Unexpected type for release tag', { + throw new RpmParserError('Unexpected type for release tag', { type: entry.info.type, }); } @@ -34,7 +38,7 @@ export async function getPackageInfo( case RpmTag.ARCH: if (entry.info.type !== RpmType.STRING) { - throw new ParserError('Unexpected type for arch tag', { + throw new RpmParserError('Unexpected type for arch tag', { type: entry.info.type, }); } @@ -43,7 +47,7 @@ export async function getPackageInfo( case RpmTag.EPOCH: if (entry.info.type !== RpmType.INT32) { - throw new ParserError('Unexpected type for epoch tag', { + throw new RpmParserError('Unexpected type for epoch tag', { type: entry.info.type, }); } @@ -52,7 +56,7 @@ export async function getPackageInfo( case RpmTag.SIZE: if (entry.info.type !== RpmType.INT32) { - throw new ParserError('Unexpected type for size tag', { + throw new RpmParserError('Unexpected type for size tag', { type: entry.info.type, }); } @@ -61,7 +65,7 @@ export async function getPackageInfo( case RpmTag.VERSION: if (entry.info.type !== RpmType.STRING) { - throw new ParserError('Unexpected type for version tag', { + throw new RpmParserError('Unexpected type for version tag', { type: entry.info.type, }); } diff --git a/lib/rpm/header.ts b/lib/rpm/header.ts index 917475a..0e3fb34 100644 --- a/lib/rpm/header.ts +++ b/lib/rpm/header.ts @@ -1,7 +1,7 @@ import { eventLoopSpinner } from 'event-loop-spinner'; import { IndexEntry, ENTRY_INFO_SIZE, EntryInfo } from './types'; -import { ParserError } from '../types'; +import { RpmParserError } from '../types'; /** * Transform a blob of metadadata into addressable RPM package entries. @@ -14,7 +14,7 @@ export async function headerImport(data: Buffer): Promise { if (indexLength <= 0 || indexLength > 50_000) { // Ensure we don't allocate something crazy... - throw new ParserError('Invalid index length', { indexLength }); + throw new RpmParserError('Invalid index length', { indexLength }); } const entryInfos = new Array(); diff --git a/lib/types.ts b/lib/types.ts index 1b78632..16c52f8 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -1,15 +1,34 @@ -export interface IParserResponse { +import { PackageInfo } from './rpm/types'; + +export interface RpmParserResponse { + /** + * Returns all packages detected in an RPM database file. + * Currently this is a flat list of dependencies and not a dependency graph. + */ + packages: PackageInfo[]; + + /** + * Returns the list of packages, each stored in a new line. Packages include the following attributes: + * name, version, epoch, size, architecture, and release. The attributes are delimited by a tab. + * @deprecated Use "packages" instead, which makes it easier to parse entries. + */ response: string; - rpmMetadata?: IRpmMetadata; - error?: ParserError; -} -export interface IRpmMetadata { - packagesProcessed: number; + /** + * Contains a count of the entries that could not be processed as RPM packages. + * In all circumstances this should be 0. However, if it does contain skipped packages + * then it may indicate: + * 1. A corrupt or bad RPM package/entry, or + * 2. A limitation of the parser - possible reasons could be a new RPM version or a bug in the parsing logic. + */ packagesSkipped: number; } -export class ParserError extends Error { +/** + * Thrown on any encountered exception by the parser. + * Includes a "context" object to pass extra information about the error. + */ +export class RpmParserError extends Error { readonly context: unknown | undefined; constructor(message: string, context?: unknown) { diff --git a/test/index.test.ts b/test/index.test.ts index dfd9fac..9076536 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -35,18 +35,10 @@ describe('Testing various RPM databases', () => { const parserOutput = await getPackages(rpmDb); - expect(parserOutput.error).toBeUndefined(); - expect(parserOutput.rpmMetadata).toBeDefined(); - expect(parserOutput.rpmMetadata!.packagesSkipped).toEqual(0); - - const expectedEntries = expectedOutput - .trim() - .split('\n') - .sort(); - const parserEntries = parserOutput.response - .trim() - .split('\n') - .sort(); + expect(parserOutput.packagesSkipped).toEqual(0); + + const expectedEntries = expectedOutput.trim().split('\n').sort(); + const parserEntries = parserOutput.response.trim().split('\n').sort(); for (let j = 0; j < expectedEntries.length; j++) { const expectedEntry = expectedEntries[j];