diff --git a/lib/kms/Cache.js b/lib/kms/Cache.js new file mode 100644 index 0000000000..a12b329e6b --- /dev/null +++ b/lib/kms/Cache.js @@ -0,0 +1,56 @@ +class Cache { + constructor() { + this.lastChecked = null; + this.result = null; + } + + /** + * Retrieves the cached result. + * @returns {object|null} The cached result or null if not set. + */ + getResult() { + return this.result; + } + + /** + * Retrieves the last checked timestamp. + * @returns {number|null} The timestamp of the last check or null if never checked. + */ + getLastChecked() { + return this.lastChecked; + } + + /** + * Updates the cache with a new result and timestamp. + * @param {object} result - The result to cache. + * @returns {undefined} + */ + setResult(result) { + this.lastChecked = Date.now(); + this.result = result; + } + + /** + * Determines if the cache should be refreshed based on the last checked time. + * @param {number} duration - Duration in milliseconds for cache validity. + * @returns {boolean} true if the cache should be refreshed, else false. + */ + shouldRefresh(duration = 1 * 60 * 60 * 1000) { // Default: 1 hours + if (!this.lastChecked) return true; + const now = Date.now(); + const elapsed = now - this.lastChecked; + const jitter = Math.floor(Math.random() * 15 * 60 * 1000); // Up to 15 minutes + return elapsed > (duration - jitter); + } + + /** + * Clears the cache. + * @returns {undefined} + */ + clear() { + this.lastChecked = null; + this.result = null; + } +} + +module.exports = Cache; diff --git a/lib/kms/wrapper.js b/lib/kms/wrapper.js index a00e52edb8..547364c9e9 100644 --- a/lib/kms/wrapper.js +++ b/lib/kms/wrapper.js @@ -10,6 +10,8 @@ const KMIPClient = require('arsenal').network.kmipClient; const { KmsAWSClient } = require('arsenal').network; const Common = require('./common'); const vault = require('../auth/vault'); +const Cache = require('./Cache'); +const cache = new Cache(); let scalityKMS; let scalityKMSImpl; try { @@ -355,26 +357,49 @@ class KMS { } static checkHealth(log, cb) { + logger.debug('current KMS cache state', { lastChecked: cache.getLastChecked(), result: cache.getResult() }); if (!client.healthcheck) { return cb(null, { [implName]: { code: 200, message: 'OK' }, }); } - return client.healthcheck(log, err => { - const respBody = {}; - if (err) { - respBody[implName] = { - error: err.description, - code: err.code, - }; - } else { - respBody[implName] = { - code: 200, - message: 'OK', - }; - } - return cb(null, respBody); - }); + + const shouldRefreshCache = cache.shouldRefresh(); + + // healthcheck only if cache needs refreshing + if (shouldRefreshCache) { + logger.debug('health check for KMS backend'); + return client.healthcheck(log, (err) => { + const respBody = {}; + if (err) { + // Returning HTTP status code 200 allows CloudServer to start up and listen on port 8000 + // when KMS is unavailable, and ensures that the S3 frontend deep healthcheck + // still allow requests to this CloudServer instance. This way, an unavailable KMIP + // host or a connection issue with KMIP will not prevent the normal operation of CloudServer. + respBody[implName] = { + code: 200, + message: 'KMS health check failed', + description: err.description, + }; + logger.warn('KMS health check failed', { errorCode: err.code, error: err.description }); + } else { + respBody[implName] = { + code: 200, + message: 'OK', + }; + logger.info('KMS health check succeeded', { respBody }); + } + // Only update the cache on success + cache.setResult(respBody); + logger.debug('updated KMS cache:', { lastChecked: cache.getLastChecked(), result: cache.getResult() }); + return cb(null, respBody); + }); + } + + // Use cached healthcheck result if within a 1 hour window + const result = cache.getResult(); + logger.debug('using cached KMS health check', { cachedResult: result }); + return cb(null, result); } } diff --git a/lib/utilities/healthcheckHandler.js b/lib/utilities/healthcheckHandler.js index f8bd88b1e5..7b0192206a 100644 --- a/lib/utilities/healthcheckHandler.js +++ b/lib/utilities/healthcheckHandler.js @@ -4,6 +4,7 @@ const { data } = require('../data/wrapper'); const vault = require('../auth/vault'); const metadata = require('../metadata/wrapper'); const async = require('async'); +const kms = require('../kms/wrapper'); // current function utility is minimal, but will be expanded function isHealthy() { @@ -40,15 +41,11 @@ function writeResponse(res, error, log, results, cb) { * @return {undefined} */ function clientCheck(flightCheckOnStartUp, log, cb) { - // FIXME S3C-4833 KMS healthchecks have been disabled: - // - they should be reworked to avoid blocking all requests, - // including unencrypted requests - // - they should not prevent Cloudserver from starting up const clients = [ data, metadata, vault, - // kms, + kms, ]; const clientTasks = []; clients.forEach(client => { diff --git a/tests/unit/encryption/checkHealth.js b/tests/unit/encryption/checkHealth.js new file mode 100644 index 0000000000..14487b205e --- /dev/null +++ b/tests/unit/encryption/checkHealth.js @@ -0,0 +1,91 @@ +const assert = require('assert'); +const sinon = require('sinon'); +const { errors } = require('arsenal'); + +const Cache = require('../../../lib/kms/Cache'); +const { DummyRequestLogger } = require('../helpers'); +const memBackend = require('../../../lib/kms/in_memory/backend'); +const kms = require('../../../lib/kms/wrapper'); + +const log = new DummyRequestLogger(); + +describe('KMS.checkHealth', () => { + let getResultStub; + let setResultStub; + let shouldRefreshStub; + + beforeEach(() => { + getResultStub = sinon.stub(Cache.prototype, 'getResult').returns(null); + setResultStub = sinon.stub(Cache.prototype, 'setResult').returns(); + shouldRefreshStub = sinon.stub(Cache.prototype, 'shouldRefresh').returns(true); + + delete memBackend.healthcheck; + }); + + afterEach(() => { + sinon.restore(); + }); + + it('should return OK when kms backend does not have healthcheck method', (done) => { + kms.checkHealth(log, (err, result) => { + assert.strictEqual(err, null); + assert.deepStrictEqual(result, { + ['memoryKms']: { code: 200, message: 'OK' }, + }); + + done(); + }); + }); + + it('should return OK when healthcheck succeeds', (done) => { + memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null)); + + kms.checkHealth(log, (err, result) => { + assert.strictEqual(err, null); + assert.deepStrictEqual(result, { + ['memoryKms']: { code: 200, message: 'OK' }, + }); + + // Verify that cache.setResult was called with the correct result + assert(setResultStub.calledOnceWithExactly(result)); + + done(); + }); + }); + + it('should return failure message when healthcheck fails', (done) => { + memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(errors.InternalError)); + + kms.checkHealth(log, (err, result) => { + assert.strictEqual(err, null); + assert.deepStrictEqual(result, { + ['memoryKms']: { + code: 200, + message: 'KMS health check failed', + description: 'We encountered an internal error. Please try again.', + }, + }); + + assert(setResultStub.calledOnceWithExactly(result)); + + done(); + }); + }); + + it('should use cached result when not refreshing', (done) => { + memBackend.backend.healthcheck = sinon.stub().callsFake((log, cb) => cb(null)); + shouldRefreshStub.returns(false); + + const cachedResult = { + ['memoryKms']: { code: 200, message: 'Cached OK' }, + }; + getResultStub.returns(cachedResult); + + kms.checkHealth(log, (err, result) => { + assert.strictEqual(err, null); + assert.deepStrictEqual(result, cachedResult); + + done(); + }); + }); +}); diff --git a/tests/unit/encryption/healthCheckCache.js b/tests/unit/encryption/healthCheckCache.js new file mode 100644 index 0000000000..8ec71e6d2a --- /dev/null +++ b/tests/unit/encryption/healthCheckCache.js @@ -0,0 +1,125 @@ +const assert = require('assert'); +const Cache = require('../../../lib/kms/Cache'); + +describe('Cache Class', () => { + let cache; + let originalDateNow; + let originalMathRandom; + + beforeEach(() => { + cache = new Cache(); + originalDateNow = Date.now; + originalMathRandom = Math.random; + }); + + afterEach(() => { + Date.now = originalDateNow; + Math.random = originalMathRandom; + }); + + describe('getResult()', () => { + it('should return null when no result is set', () => { + assert.strictEqual(cache.getResult(), null); + }); + + it('should return the cached result after setResult is called', () => { + const result = { data: 'test' }; + cache.setResult(result); + assert.deepStrictEqual(cache.getResult(), result); + }); + }); + + describe('getLastChecked()', () => { + it('should return null when cache has never been set', () => { + assert.strictEqual(cache.getLastChecked(), null); + }); + + it('should return the timestamp after setResult is called', () => { + const fakeTimestamp = 1625077800000; // Example timestamp + Date.now = () => fakeTimestamp; // Mock Date.now + cache.setResult({ data: 'test' }); + assert.strictEqual(cache.getLastChecked(), fakeTimestamp); + }); + }); + + describe('setResult()', () => { + it('should set the result and update lastChecked', () => { + const fakeTimestamp = 1625077800000; + Date.now = () => fakeTimestamp; // Mock Date.now + const result = { data: 'test' }; + cache.setResult(result); + assert.deepStrictEqual(cache.getResult(), result); + assert.strictEqual(cache.getLastChecked(), fakeTimestamp); + }); + }); + + describe('shouldRefresh()', () => { + it('should return true if cache has never been set', () => { + assert.strictEqual(cache.shouldRefresh(), true); + }); + + it('should return false if elapsed time is less than duration minus maximum jitter', () => { + const fakeNow = 1625077800000; + // Elapsed time = 1 hour - 15 minutes = 45 minutes + const fakeLastChecked = fakeNow - (45 * 60 * 1000); // 45 minutes ago + Date.now = () => fakeNow; + cache.lastChecked = fakeLastChecked; + + // Mock Math.random to return 0 (jitter = 0) + Math.random = () => 0; + + // elapsed = 45 minutes, duration - jitter = 60 minutes + // 45 < 60 => shouldRefresh = false + assert.strictEqual(cache.shouldRefresh(), false); + }); + + it('should return true if elapsed time is greater than duration minus maximum jitter', () => { + const fakeNow = 1625077800000; + // Elapsed time = 1 hour + 1 minute = 61 minutes + const fakeLastChecked = fakeNow - (61 * 60 * 1000); // 61 minutes ago + Date.now = () => fakeNow; + cache.lastChecked = fakeLastChecked; + + // Mock Math.random to return 0 (jitter = 0) + Math.random = () => 0; + + // elapsed = 61 minutes, duration - jitter = 60 minutes + // 61 > 60 => shouldRefresh = true + assert.strictEqual(cache.shouldRefresh(), true); + }); + + it('should use custom duration if provided', () => { + const customDuration = 6 * 60 * 60 * 1000; // 6 hours in milliseconds + const fakeNow = 1625077800000; + Date.now = () => fakeNow; + + // Elapsed time = 5 hours + const fakeLastChecked1 = fakeNow - (5 * 60 * 60 * 1000); + cache.lastChecked = fakeLastChecked1; + + // Mock Math.random to return 0 (jitter = 0) + Math.random = () => 0; + + // 5 hours < 6 hours => shouldRefresh = false + assert.strictEqual(cache.shouldRefresh(customDuration), false, + 'Cache should not refresh within custom duration'); + + // Elapsed time = 7 hours + const fakeLastChecked2 = fakeNow - (7 * 60 * 60 * 1000); + cache.lastChecked = fakeLastChecked2; + + // 7 hours > 6 hours => shouldRefresh = true + assert.strictEqual(cache.shouldRefresh(customDuration), true, + 'Cache should refresh after custom duration'); + }); + }); + + describe('clear()', () => { + it('should reset lastChecked and result to null', () => { + cache.setResult({ data: 'test' }); + cache.clear(); + assert.strictEqual(cache.getResult(), null); + assert.strictEqual(cache.getLastChecked(), null); + }); + }); +});