diff --git a/build.sbt b/build.sbt index 19b3005bad..1f6229c22d 100644 --- a/build.sbt +++ b/build.sbt @@ -267,6 +267,9 @@ val resourceSettings = Seq( // conf directory. (Compile / PlayKeys.playExternalizedResources) += file("modules/xquery/src/main/resources/xtra.xqm") -> "xtra.xqm", + // Prevents websockets from being closed by the server + PlayKeys.devSettings += "play.server.websocket.periodic-keep-alive-mode" -> "pong", + // Filter out excluded resources from packaging Universal / mappings := (Universal / mappings).value.filterNot { case (f, s) => excludedResources contains f.getName diff --git a/conf/application.conf b/conf/application.conf index 96ed0a8029..5267dd4bfc 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -424,6 +424,13 @@ ehri { # to avoid exceeding backend limits maxDeletions: 1000 } + + auditor { + maxResults: 1000 + # Limited by hard-coded max search results, don't + # increase this value + batchSize: 100 + } } # SKOS/RDF export config. This sets the default baseURI for @@ -438,6 +445,11 @@ ehri { # Ensure indexer does not idle timeout akka.http.host-connection-pool.client.idle-timeout = 5 minutes +akka.http.client.idle-timeout = 5 minutes +akka.http.server.idle-timeout = 5 minutes + +# Ensure websocket connections do not idle timeout +play.server.websocket.periodic-keep-alive-max-idle = 10 seconds # system-specific overrides and extensions diff --git a/conf/logback-play-dev.xml b/conf/logback-play-dev.xml index 4e271d83fc..d359ef280e 100644 --- a/conf/logback-play-dev.xml +++ b/conf/logback-play-dev.xml @@ -70,6 +70,9 @@ + + + diff --git a/modules/admin/app/actors/datamodel/Auditor.scala b/modules/admin/app/actors/datamodel/Auditor.scala new file mode 100644 index 0000000000..1a275597bc --- /dev/null +++ b/modules/admin/app/actors/datamodel/Auditor.scala @@ -0,0 +1,94 @@ +package actors.datamodel + +import actors.LongRunningJob.Cancel +import actors.datamodel.AuditorManager.{AuditTask, AuditorJob} +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.pattern._ +import models._ +import services.data.DataUser +import services.search._ +import utils.PageParams + +import java.time.{Duration, Instant} +import scala.concurrent.ExecutionContext + +object Auditor { + sealed trait Action + + case class RunAudit(job: AuditorJob, query: Option[SearchQuery] = None) extends Action + + case class Check(id: String, errors: Seq[ValidationError]) + + case class Checked(checked: Int) extends Action + + case class Completed(checked: Int, count: Int, secs: Long = 0) extends Action + + case class Cancelled(checked: Int, count: Int, secs: Long = 0) extends Action + + case class CheckBatch(checks: Seq[Check], more: Boolean) + + private case class Batch(job: AuditorJob, items: Seq[Entity], query: SearchQuery, more: Boolean) extends Action +} + + +case class Auditor(searchEngine: SearchEngine, resolver: SearchItemResolver, fieldMetadataSet: FieldMetadataSet, batchSize: Int, maxFlagged: Int)( + implicit userOpt: Option[UserProfile], ec: ExecutionContext) extends Actor with ActorLogging { + import Auditor._ + private implicit val apiUser: DataUser = DataUser(userOpt.map(_.id)) + + override def receive: Receive = { + case e: RunAudit => + context.become(running(sender(), Instant.now(), 0, 0)) + self ! e + } + + def running(msgTo: ActorRef, start: Instant, checked: Int, flagged: Int): Receive = { + case RunAudit(job, queryOpt) => + // Search for entities to audit + val query: SearchQuery = queryOpt.getOrElse(initSearch(job.task)) + + searchEngine.search(query).flatMap { res => + resolver.resolve[Entity](res.page.items).map { list => + Batch(job, list.flatten, query, res.page.hasMore) + } + }.pipeTo(self) + + case Batch(job, items, query, more) => + log.debug(s"Found ${items.size} items for audit, total so far: $checked") + val errors: Seq[Check] = items.flatMap { item => + val errs = fieldMetadataSet.validate(item) + val pErrs = errs.collect { case e@MissingMandatoryField(_) => e} + if (job.task.mandatoryOnly && pErrs.isEmpty) None + else if (errs.nonEmpty) Some(Check(item.id, errs)) + else None + } + + if (checked % batchSize == 0) { + msgTo ! Checked(checked) + } + + msgTo ! CheckBatch(errors, more) + + if (more && (flagged + errors.size) < maxFlagged) { + val next = query.copy(paging = query.paging.next) + context.become(running(msgTo, start, checked + items.size, flagged + errors.size)) + self ! RunAudit(job, Some(next)) + } else { + msgTo ! Completed(checked + items.size, flagged + errors.size, time(start)) + } + + case Cancel => + log.debug(s"Cancelling audit job, checked so far: $checked, flagged: $flagged") + sender() ! Cancelled(checked, flagged) + } + + private def time(from: Instant): Long = Duration.between(from, Instant.now()).toMillis / 1000 + + private def initSearch(task: AuditTask): SearchQuery = { + val paging: PageParams = PageParams(limit = batchSize) + val params: SearchParams = SearchParams(entities = Seq(task.entityType), query = task.idPrefix.map(p => s"$p*"), sort = Some(SearchSort.Id)) + val cpFacet = AppliedFacet(SearchConstants.CREATION_PROCESS, Seq(Description.CreationProcess.Manual.toString)) + val facets = List(FieldFacetClass(key = SearchConstants.CREATION_PROCESS, name = "Creation Process", param = "creation")) + SearchQuery(params = params, paging = paging, appliedFacets = Seq(cpFacet), user = userOpt, facetClasses = facets) + } +} diff --git a/modules/admin/app/actors/datamodel/AuditorManager.scala b/modules/admin/app/actors/datamodel/AuditorManager.scala new file mode 100644 index 0000000000..d3c8d1cb07 --- /dev/null +++ b/modules/admin/app/actors/datamodel/AuditorManager.scala @@ -0,0 +1,116 @@ +package actors.datamodel + +import actors.LongRunningJob.Cancel +import actors.datamodel.Auditor.{Cancelled, CheckBatch, Checked, Completed, RunAudit} +import actors.datamodel.AuditorManager.{AuditorJob, ItemResult} +import akka.actor.SupervisorStrategy.Stop +import akka.actor.{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props, SupervisorStrategy, Terminated} +import models._ +import play.api.Configuration +import play.api.i18n.Messages +import play.api.libs.json.{Format, Json} +import services.search.{SearchEngine, SearchItemResolver} +import utils.WebsocketConstants + +import scala.concurrent.ExecutionContext +import scala.concurrent.duration.DurationInt + +object AuditorManager { + + case class AuditTask(entityType: EntityType.Value, idPrefix: Option[String] = None, mandatoryOnly: Boolean) + object AuditTask { + implicit val _format: Format[AuditTask] = Json.format[AuditTask] + } + + case class AuditorJob(jobId: String, task: AuditTask) + + case class ItemResult(id: String, mandatory: Seq[String], desirable: Seq[String]) + object ItemResult { + implicit val _format: Format[ItemResult] = Json.format[ItemResult] + } +} + +case class AuditorManager(job: AuditorJob, searchEngine: SearchEngine, searchItemResolver: SearchItemResolver, fieldMetadataSet: FieldMetadataSet)( + implicit userOpt: Option[UserProfile], messages: Messages, ec: ExecutionContext, config: Configuration) extends Actor with ActorLogging { + + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { + case e => + self ! e + Stop + } + + // Ready state: we've received a job but won't actually start + // until there is a channel to talk through + override def receive: Receive = { + case chan: ActorRef => + log.debug("Received initial subscriber, starting...") + val maxResults = config.get[Int]("ehri.admin.auditor.maxResults") + val batchSize = config.get[Int]("ehri.admin.auditor.batchSize") + val runner = context.actorOf(Props(Auditor(searchEngine, searchItemResolver, fieldMetadataSet, batchSize, maxResults))) + context.become(running(runner, Set(chan))) + runner ! RunAudit(job, None) + } + + /** + * Running state. + * + * @param runner the harvest runner actor + * @param subs a set of subscribers to message w/ updates + */ + def running(runner: ActorRef, subs: Set[ActorRef]): Receive = { + + // Add a new message subscriber + case chan: ActorRef => + log.debug(s"Added new message subscriber, ${subs.size}") + context.watch(chan) + context.become(running(runner, subs + chan)) + + case Terminated(actor) if actor == runner => + log.debug(s"Actor terminated: $actor") + context.system.scheduler.scheduleOnce(5.seconds, self, + "Convert runner unexpectedly shut down") + + // Remove terminated subscribers + case Terminated(chan) => + log.debug(s"Removing subscriber: $chan") + context.unwatch(chan) + context.become(running(runner, subs - chan)) + + // A file has been converted + case CheckBatch(checks, _) => + val res: Seq[ItemResult] = checks.filter(_.errors.nonEmpty).map { check => + val mandatory = check.errors.collect { case e: MissingMandatoryField => e } + val desirable = check.errors.collect { case e: MissingDesirableField => e } + ItemResult(check.id, mandatory.map(_.id), desirable.map(_.id)) + } + msg(Json.stringify(Json.toJson(res)), subs) + + // Received confirmation that the runner has shut down + case Cancelled(checked, _, secs) => + msg(Messages("dataModel.audit.cancelled", WebsocketConstants.DONE_MESSAGE, checked, secs), subs) + context.stop(self) + + // The runner has completed, so we log the + // event and shut down too + case Completed(checked, flagged, secs) => + msg(Messages("dataModel.audit.completed", WebsocketConstants.DONE_MESSAGE, checked, flagged, secs), subs) + context.stop(self) + + case Checked(checked) => + msg(Messages("dataModel.audit.checked", WebsocketConstants.INFO_MESSAGE, checked), subs) + + // Cancel conversion... here we tell the runner to exit + // and shut down on its termination signal... + case Cancel => + log.info("Cancelling audit...") + runner ! Cancel + + case m => + log.error(s"Unexpected message: $m") + } + + private def msg(s: String, subs: Set[ActorRef]): Unit = { +// log.info(s + s" (subscribers: ${subs.size})") + subs.foreach(_ ! s) + } +} diff --git a/modules/admin/app/assets/css/dmeditor.scss b/modules/admin/app/assets/css/dmeditor.scss index 5614af5b12..fc880890f4 100644 --- a/modules/admin/app/assets/css/dmeditor.scss +++ b/modules/admin/app/assets/css/dmeditor.scss @@ -46,3 +46,13 @@ $active-table-row: #e7f1ff; margin-right: auto; } +#auditor-suggestions-container { + @extend %expanding-column; + @extend %overflow-contents; + table-layout: fixed; + margin-bottom: $margin-md; +} + +.auditor-options { + margin-bottom: $margin-md; +} diff --git a/modules/admin/app/assets/js/datasets/api.ts b/modules/admin/app/assets/js/datasets/api.ts index d4efdc68ba..d2ff40ec41 100644 --- a/modules/admin/app/assets/js/datasets/api.ts +++ b/modules/admin/app/assets/js/datasets/api.ts @@ -40,23 +40,23 @@ export class DatasetManagerApi { } cancel(jobId: string): Promise<{ok: boolean}> { - return apiCall(this.service.LongRunningJobs.cancel(this.repoId, jobId)); + return apiCall(this.service.admin.Tasks.cancel(jobId)); } listFiles(ds: string, stage: string, prefix: string, after?: string): Promise { - return apiCall(this.service.ImportFiles.listFiles(this.repoId, ds, stage, prefix, after)); + return apiCall(this.service.datasets.ImportFiles.listFiles(this.repoId, ds, stage, prefix, after)); } getImportConfig(ds: string): Promise { - return apiCall(this.service.ImportConfigs.get(this.repoId, ds)); + return apiCall(this.service.datasets.ImportConfigs.get(this.repoId, ds)); } saveImportConfig(ds: string, config: ImportConfig): Promise { - return apiCall(this.service.ImportConfigs.save(this.repoId, ds), config); + return apiCall(this.service.datasets.ImportConfigs.save(this.repoId, ds), config); } deleteImportConfig(ds: string): Promise { - return apiCall(this.service.ImportConfigs.delete(this.repoId, ds)); + return apiCall(this.service.datasets.ImportConfigs.delete(this.repoId, ds)); } ingestFiles(ds: string, paths: string[], opts: ImportConfig, commit: boolean): Promise { @@ -65,31 +65,31 @@ export class DatasetManagerApi { commit: commit, files: paths, }; - return apiCall(this.service.ImportConfigs.ingestFiles(this.repoId, ds), data); + return apiCall(this.service.datasets.ImportConfigs.ingestFiles(this.repoId, ds), data); } deleteFiles(ds: string, stage: string, paths: string[]): Promise<{deleted: number}> { - return apiCall(this.service.ImportFiles.deleteFiles(this.repoId, ds, stage), paths); + return apiCall(this.service.datasets.ImportFiles.deleteFiles(this.repoId, ds, stage), paths); } validateFiles(ds: string, stage: string, tagToPath: object): Promise { - return apiCall(this.service.ImportFiles.validateFiles(this.repoId, ds, stage), tagToPath); + return apiCall(this.service.datasets.ImportFiles.validateFiles(this.repoId, ds, stage), tagToPath); } info(ds: string, stage: string, key: string, versionId?: string): Promise { - return apiCall(this.service.ImportFiles.info(this.repoId, ds, stage, key, versionId)); + return apiCall(this.service.datasets.ImportFiles.info(this.repoId, ds, stage, key, versionId)); } fileUrls(ds: string, stage: string, paths: string[]): Promise { - return apiCall(this.service.ImportFiles.fileUrls(this.repoId, ds, stage), paths); + return apiCall(this.service.datasets.ImportFiles.fileUrls(this.repoId, ds, stage), paths); } copyFile(ds: string, stage: string, key: string, toDs: string, toName?: string, versionId?: string): Promise { - return apiCall(this.service.ImportFiles.copyFile(this.repoId, ds, stage, key, toDs, toName, versionId)); + return apiCall(this.service.datasets.ImportFiles.copyFile(this.repoId, ds, stage, key, toDs, toName, versionId)); } uploadHandle(ds: string, stage: string, fileSpec: FileToUpload): Promise<{presignedUrl: string}> { - return apiCall(this.service.ImportFiles.uploadHandle(this.repoId, ds, stage), fileSpec); + return apiCall(this.service.datasets.ImportFiles.uploadHandle(this.repoId, ds, stage), fileSpec); } uploadFile(url: string, file: File, progressHandler: Function) { @@ -121,154 +121,154 @@ export class DatasetManagerApi { } harvest(ds: string, config: HarvestConfig, fromLast?: boolean): Promise { - return apiCall(this.service.HarvestConfigs.harvest(this.repoId, ds, fromLast), config); + return apiCall(this.service.datasets.HarvestConfigs.harvest(this.repoId, ds, fromLast), config); } getHarvestConfig(ds: string): Promise { - return apiCall(this.service.HarvestConfigs.get(this.repoId, ds)); + return apiCall(this.service.datasets.HarvestConfigs.get(this.repoId, ds)); } saveHarvestConfig(ds: string, config: HarvestConfig): Promise { - return apiCall(this.service.HarvestConfigs.save(this.repoId, ds), config); + return apiCall(this.service.datasets.HarvestConfigs.save(this.repoId, ds), config); } deleteHarvestConfig(ds: string): Promise { - return apiCall(this.service.HarvestConfigs.delete(this.repoId, ds)); + return apiCall(this.service.datasets.HarvestConfigs.delete(this.repoId, ds)); } testHarvestConfig(ds: string, config: HarvestConfig): Promise<{ok: true}> { - return apiCall(this.service.HarvestConfigs.test(this.repoId, ds), config); + return apiCall(this.service.datasets.HarvestConfigs.test(this.repoId, ds), config); } cleanHarvestConfig(ds: string, config: HarvestConfig): Promise { - return apiCall(this.service.HarvestConfigs.clean(this.repoId, ds), config); + return apiCall(this.service.datasets.HarvestConfigs.clean(this.repoId, ds), config); } convert(ds: string, key: string|null, config: ConvertConfig): Promise { - return apiCall(this.service.ConvertConfigs.convert(this.repoId, ds, key), config); + return apiCall(this.service.datasets.ConvertConfigs.convert(this.repoId, ds, key), config); } convertFileUrl(ds: string, stage: string, key: string) { - return this.service.ConvertConfigs.convertFile(this.repoId, ds, stage, key).url; + return this.service.datasets.ConvertConfigs.convertFile(this.repoId, ds, stage, key).url; } getConvertConfig(ds: string): Promise<[string, object][]> { - return apiCall(this.service.ConvertConfigs.get(this.repoId, ds)); + return apiCall(this.service.datasets.ConvertConfigs.get(this.repoId, ds)); } saveConvertConfig(ds: string, dtIds: [string, object][]): Promise<{ok: true}> { - return apiCall(this.service.ConvertConfigs.save(this.repoId, ds), dtIds); + return apiCall(this.service.datasets.ConvertConfigs.save(this.repoId, ds), dtIds); } listDataTransformations(): Promise { - return apiCall(this.service.DataTransformations.list(this.repoId)); + return apiCall(this.service.datasets.DataTransformations.list(this.repoId)); } getDataTransformation(id: string): Promise { - return apiCall(this.service.DataTransformations.get(this.repoId, id)); + return apiCall(this.service.datasets.DataTransformations.get(this.repoId, id)); } createDataTransformation(generic: boolean, data: DataTransformationInfo): Promise { - return apiCall(this.service.DataTransformations.create(this.repoId, generic), data); + return apiCall(this.service.datasets.DataTransformations.create(this.repoId, generic), data); } updateDataTransformation(id: string, generic: boolean, data: DataTransformationInfo): Promise { - return apiCall(this.service.DataTransformations.update(this.repoId, id, generic), data); + return apiCall(this.service.datasets.DataTransformations.update(this.repoId, id, generic), data); } deleteDataTransformation(id: string): Promise { - return apiCall(this.service.DataTransformations.delete(this.repoId, id)); + return apiCall(this.service.datasets.DataTransformations.delete(this.repoId, id)); } listDatasets(): Promise { - return apiCall(this.service.ImportDatasets.list(this.repoId)); + return apiCall(this.service.datasets.ImportDatasets.list(this.repoId)); } listAllDatasets(): Promise { - return apiCall(this.service.ImportDatasets.listAll()); + return apiCall(this.service.datasets.ImportDatasets.listAll()); } datasetStats(): Promise> { - return apiCall(this.service.ImportDatasets.stats(this.repoId)); + return apiCall(this.service.datasets.ImportDatasets.stats(this.repoId)); } createDataset(info: ImportDatasetInfo): Promise { - return apiCall(this.service.ImportDatasets.create(this.repoId), info); + return apiCall(this.service.datasets.ImportDatasets.create(this.repoId), info); } updateDataset(ds: string, info: ImportDatasetInfo): Promise { - return apiCall(this.service.ImportDatasets.update(this.repoId, ds), info); + return apiCall(this.service.datasets.ImportDatasets.update(this.repoId, ds), info); } importDatasets(ds: string, info: ImportDatasetInfo[]): Promise<{ok: true}> { - return apiCall(this.service.ImportDatasets.batch(this.repoId), info); + return apiCall(this.service.datasets.ImportDatasets.batch(this.repoId), info); } fileCount(ds: string): Promise { - return apiCall(this.service.ImportDatasets.fileCount(this.repoId, ds)); + return apiCall(this.service.datasets.ImportDatasets.fileCount(this.repoId, ds)); } deleteDataset(ds: string): Promise { - return apiCall(this.service.ImportDatasets.delete(this.repoId, ds)); + return apiCall(this.service.datasets.ImportDatasets.delete(this.repoId, ds)); } datasetErrors(ds: string): Promise { - return apiCall(this.service.ImportDatasets.errors(this.repoId, ds)); + return apiCall(this.service.datasets.ImportDatasets.errors(this.repoId, ds)); } listSnapshots(): Promise { - return apiCall(this.service.ImportLogs.listSnapshots(this.repoId)); + return apiCall(this.service.datasets.ImportLogs.listSnapshots(this.repoId)); } takeSnapshot(info: SnapshotInfo): Promise { - return apiCall(this.service.ImportLogs.takeSnapshot(this.repoId), info); + return apiCall(this.service.datasets.ImportLogs.takeSnapshot(this.repoId), info); } diffSnapshot(snId: number): Promise<[string, string][]> { - return apiCall(this.service.ImportLogs.diffSnapshot(this.repoId, snId)); + return apiCall(this.service.datasets.ImportLogs.diffSnapshot(this.repoId, snId)); } cleanup(snId: number): Promise { - return apiCall(this.service.ImportLogs.cleanup(this.repoId, snId)); + return apiCall(this.service.datasets.ImportLogs.cleanup(this.repoId, snId)); } listCleanups(snId: number): Promise<[number, string][]> { - return apiCall(this.service.ImportLogs.listCleanups(this.repoId, snId)); + return apiCall(this.service.datasets.ImportLogs.listCleanups(this.repoId, snId)); } getCleanup(snId: number, cleanupId: number): Promise { - return apiCall(this.service.ImportLogs.getCleanup(this.repoId, snId, cleanupId)); + return apiCall(this.service.datasets.ImportLogs.getCleanup(this.repoId, snId, cleanupId)); } doCleanup(snId: number, confirm: object): Promise { - return apiCall(this.service.ImportLogs.doCleanup(this.repoId, snId), confirm); + return apiCall(this.service.datasets.ImportLogs.doCleanup(this.repoId, snId), confirm); } doCleanupAsync(snId: number, confirm: object): Promise { - return apiCall(this.service.ImportLogs.doCleanupAsync(this.repoId, snId), confirm); + return apiCall(this.service.datasets.ImportLogs.doCleanupAsync(this.repoId, snId), confirm); } getCoreferences(): Promise { - return apiCall(this.service.CoreferenceTables.getTable(this.repoId)); + return apiCall(this.service.datasets.CoreferenceTables.getTable(this.repoId)); } importCoreferences(coreferences: Coreference[]): Promise<{imported: number}> { - return apiCall(this.service.CoreferenceTables.importTable(this.repoId), coreferences); + return apiCall(this.service.datasets.CoreferenceTables.importTable(this.repoId), coreferences); } extractCoreferences(): Promise<{imported: number}> { - return apiCall(this.service.CoreferenceTables.extractTable(this.repoId)); + return apiCall(this.service.datasets.CoreferenceTables.extractTable(this.repoId)); } applyCoreferences(): Promise { - return apiCall(this.service.CoreferenceTables.applyTable(this.repoId)); + return apiCall(this.service.datasets.CoreferenceTables.applyTable(this.repoId)); } deleteCoreferences(coreferences: Coreference[]): Promise<{deleted: number}> { - return apiCall(this.service.CoreferenceTables.deleteTable(this.repoId), coreferences); + return apiCall(this.service.datasets.CoreferenceTables.deleteTable(this.repoId), coreferences); } logs(dsId?: string): Promise { - return apiCall(this.service.ImportLogs.list(this.repoId, dsId)); + return apiCall(this.service.datasets.ImportLogs.list(this.repoId, dsId)); } } diff --git a/modules/admin/app/assets/js/datasets/components/_dashboard.vue b/modules/admin/app/assets/js/datasets/components/_dashboard.vue index 4873421299..e26c8612aa 100644 --- a/modules/admin/app/assets/js/datasets/components/_dashboard.vue +++ b/modules/admin/app/assets/js/datasets/components/_dashboard.vue @@ -26,6 +26,7 @@ export default { this.datasetInfo = await this.api.listAllDatasets() this.loaded = true; } catch (e) { + console.log(e); this.showError("Error loading dataset info", e); } finally { this.loading = true; diff --git a/modules/admin/app/assets/js/datasets/components/_filter-control.vue b/modules/admin/app/assets/js/datasets/components/_filter-control.vue index 7bbb0c711e..5c94d44d8d 100644 --- a/modules/admin/app/assets/js/datasets/components/_filter-control.vue +++ b/modules/admin/app/assets/js/datasets/components/_filter-control.vue @@ -1,7 +1,7 @@ diff --git a/modules/admin/app/assets/js/datasets/components/_mixin-preview-panel.vue b/modules/admin/app/assets/js/datasets/components/_mixin-preview-panel.vue index e0686cac5c..5d0c86aaae 100644 --- a/modules/admin/app/assets/js/datasets/components/_mixin-preview-panel.vue +++ b/modules/admin/app/assets/js/datasets/components/_mixin-preview-panel.vue @@ -239,7 +239,7 @@ export default { }, }, created: function () { - this.worker = new Worker(this.config.previewLoader); + this.worker = new Worker(this.config.websocketHandler); this.worker.onmessage = this.receiveMessage; }, mounted: function () { diff --git a/modules/admin/app/assets/js/datasets/components/_mixin-tasklog.vue b/modules/admin/app/assets/js/datasets/components/_mixin-tasklog.vue index 43799e5f23..b1131c97e0 100644 --- a/modules/admin/app/assets/js/datasets/components/_mixin-tasklog.vue +++ b/modules/admin/app/assets/js/datasets/components/_mixin-tasklog.vue @@ -6,6 +6,9 @@ import _startsWith from 'lodash/startsWith'; import {Terminal} from "xterm"; import termopts from "../termopts"; +interface Api { + cancel: (jobId: string) => Promise; +} let initialLogState = function (): object { return { @@ -19,7 +22,7 @@ let initialLogState = function (): object { export default { props: { - api: DatasetManagerApi, + api: Object as Api, config: Object, }, data: function (): object { @@ -40,11 +43,10 @@ export default { this.overwrite = _startsWith(line, this.logDeleteLinePrefix); }, - monitor: async function (url: string, jobId: string, onMsg: (s: string) => any = function () { - }, clear: boolean = false) { + monitor: async function (url: string, jobId: string, onMsg: (s: string) => any = function () {}, clear: boolean = false) { this.jobId = jobId; return await new Promise(((resolve) => { - let worker = new Worker(this.config.previewLoader); + let worker = new Worker(this.config.websocketHandler); worker.onmessage = msg => { if (msg.data.error) { this.println(msg.data.error); diff --git a/modules/admin/app/assets/js/datasets/dashboard-api.ts b/modules/admin/app/assets/js/datasets/dashboard-api.ts index 89395eee72..a496a8a3dc 100644 --- a/modules/admin/app/assets/js/datasets/dashboard-api.ts +++ b/modules/admin/app/assets/js/datasets/dashboard-api.ts @@ -13,10 +13,10 @@ export class DashboardApi { } managerUrl(repoId: string, ds?: string): string { - return this.service.ImportDatasets.manager(repoId, ds).url; + return this.service.datasets.ImportDatasets.manager(repoId, ds).url; } listAllDatasets(): Promise { - return apiCall(this.service.ImportDatasets.listAll()); + return apiCall(this.service.datasets.ImportDatasets.listAll()); } } diff --git a/modules/admin/app/assets/js/datasets/types.ts b/modules/admin/app/assets/js/datasets/types.ts index 2de5f47fac..fd5bcbcfa6 100644 --- a/modules/admin/app/assets/js/datasets/types.ts +++ b/modules/admin/app/assets/js/datasets/types.ts @@ -8,7 +8,7 @@ export type ConfigType = { input: string, output: string, config: string, - previewLoader: string, + websocketHandler: string, defaultTab: string, monitorUrl: (jobId: string) => string, maxPreviewSize: number, diff --git a/modules/admin/app/assets/js/dmeditor/api.ts b/modules/admin/app/assets/js/dmeditor/api.ts index 9c2bfc3179..506c7361ce 100644 --- a/modules/admin/app/assets/js/dmeditor/api.ts +++ b/modules/admin/app/assets/js/dmeditor/api.ts @@ -31,6 +31,7 @@ export default class EntityTypeMetadataApi { withCredentials: true, }).then(r => r.data); } + list(): Promise> { return EntityTypeMetadataApi.call>(this.service.list(), {}); } diff --git a/modules/admin/app/assets/js/dmeditor/auditor-api.ts b/modules/admin/app/assets/js/dmeditor/auditor-api.ts new file mode 100644 index 0000000000..68bd055197 --- /dev/null +++ b/modules/admin/app/assets/js/dmeditor/auditor-api.ts @@ -0,0 +1,61 @@ +import {apiCall} from "../datasets/common"; +import {EntityType, EntityTypeMetadata, FieldMetadataTemplates} from "./types"; + +export interface Service { + templates(): {url: string, method: any}; + list(): {url: string, method: any}; + getItemType(et: EntityType, id: string): {url: string, method: any}; + runAudit(): {url: string, method: any}; + cancel(jobId: string): {url: string, method: any}; +} + +export interface JobInfo { + jobId: string; + cancelUrl: string; + url: string; +} + +/** + * A data access object encapsulating the management API endpoints. + */ +export class AuditorApi { + service: Service; + + constructor(service: Service) { + this.service = service; + } + + /** + * Return a list of entity types for which we have audit metadata. + */ + async types(): Promise { + let list = await apiCall>(this.service.list()); + let templates = await apiCall(this.service.templates()); + return Object.keys(templates).filter(et => list[et] && Object.keys(list[et]).length > 0); + } + + /** + * Return the admin URL for a specific entity type. + * @param et the entity type + * @param id the entity ID + */ + urlFor(et: EntityType, id: string): string { + return this.service.getItemType(et, id).url; + } + + /** + * Run an audit job. + * @param data the audit job data + */ + runAudit(data: object): Promise { + return apiCall(this.service.runAudit(), data); + } + + /** + * Cancel an audit job. + * @param jobId the job ID + */ + cancelAudit(jobId: string): Promise<{ok: boolean}> { + return apiCall(this.service.cancel(jobId)); + } +} diff --git a/modules/admin/app/assets/js/dmeditor/auditor-app.vue b/modules/admin/app/assets/js/dmeditor/auditor-app.vue new file mode 100644 index 0000000000..b857be464c --- /dev/null +++ b/modules/admin/app/assets/js/dmeditor/auditor-app.vue @@ -0,0 +1,185 @@ + + + + + + × + {{ error }} + + {{ $t(`dataModel.audit`)}} + + + + {{ $t(`dataModel.audit.entityType`) }} + + {{ $t(`contentTypes.${t}`) }} + + + + {{ $t(`dataModel.audit.idPrefix`) }} + + + + + {{ $t(`dataModel.audit.mandatoryOnly`) }} + + + + Cancel Audit + + + {{ $t(`dataModel.audit.runAudit`) }} + + + + + + + {{ $t(`dataModel.audit.fields.entity`) }} + {{ $t(`dataModel.audit.fields.mandatory`) }} + {{ $t(`dataModel.audit.fields.desirable`) }} + + + {{ item.id }} + + + Fields: {{ item.mandatory.length }} + + {{ msg }} + + + + + + Fields: {{ item.desirable.length }} + + {{ msg }} + + + + + + + + {{ doneMessage }} + + + {{ infoMessage }} + + + diff --git a/modules/admin/app/assets/js/dmeditor/websocket-handler.js b/modules/admin/app/assets/js/dmeditor/websocket-handler.js new file mode 100644 index 0000000000..ad9dc7c281 --- /dev/null +++ b/modules/admin/app/assets/js/dmeditor/websocket-handler.js @@ -0,0 +1,22 @@ +addEventListener('message', ({data}) => { + + let url = data.url; + let websocket = new WebSocket(url); + websocket.onopen = function () { + console.debug("Websocket open") + }; + websocket.onerror = function (e) { + postMessage({error: e}); + }; + websocket.onmessage = function (e) { + let msg = JSON.parse(e.data); + let done = msg.indexOf(data.DONE) !== -1 || msg.indexOf(data.ERR) !== -1; + postMessage({msg: msg, done: done}); + if (done) { + websocket.close(); + } + }; + websocket.onclose = function () { + console.debug("Websocket close") + } +}, false); diff --git a/modules/admin/app/controllers/admin/Home.scala b/modules/admin/app/controllers/admin/Home.scala index 25a7e107e4..27c36726ac 100644 --- a/modules/admin/app/controllers/admin/Home.scala +++ b/modules/admin/app/controllers/admin/Home.scala @@ -23,7 +23,7 @@ case class Home @Inject()( appComponents: AppComponents ) extends AdminController with Search { - val searchEntities = List( + val searchEntities: Seq[EntityType.Value] = List( EntityType.DocumentaryUnit, EntityType.Repository, EntityType.HistoricalAgent diff --git a/modules/admin/app/controllers/admin/Tasks.scala b/modules/admin/app/controllers/admin/Tasks.scala index ac13c3e410..24a94d7a8a 100644 --- a/modules/admin/app/controllers/admin/Tasks.scala +++ b/modules/admin/app/controllers/admin/Tasks.scala @@ -1,10 +1,11 @@ package controllers.admin -import akka.actor.{Actor, ActorSystem, Props} -import akka.stream.Materializer +import actors.LongRunningJob +import akka.actor.{Actor, ActorLogging, ActorNotFound, ActorSystem, Props} +import akka.stream.{Materializer, OverflowStrategy} import controllers.AppComponents import controllers.base.AdminController -import play.api.libs.json.JsValue +import play.api.libs.json.{JsNull, JsValue, Json} import play.api.libs.streams.ActorFlow import play.api.mvc.WebSocket.MessageFlowTransformer import play.api.mvc._ @@ -26,9 +27,14 @@ case class Tasks @Inject()( def props: Props = Props(new MessageHandler) } - class MessageHandler extends Actor { + class MessageHandler extends Actor with ActorLogging { private val logger = play.api.Logger(getClass) + override def preRestart(reason: Throwable, message: Option[Any]): Unit = { + super.preRestart(reason, message) + log.error(reason, "Unhandled exception for message: {}", message) + } + override def receive: Receive = { case e => logger.warn(s"Unhandled message: $e") } @@ -38,7 +44,8 @@ case class Tasks @Inject()( MessageFlowTransformer.jsonMessageFlowTransformer[JsValue, String] def taskMonitorWS(jobId: String): WebSocket = AuthenticatedWebsocket(_.account.exists(_.staff)) { implicit request => - ActorFlow.actorRef { out => + logger.debug(s"Opening websocket for task: $jobId") + ActorFlow.actorRef(out => { system.actorSelection(s"user/$jobId").resolveOne(5.seconds).onComplete { case Success(ref) => logger.info(s"Monitoring job: $jobId") @@ -49,6 +56,19 @@ case class Tasks @Inject()( } MessageHandler.props + }, 24, OverflowStrategy.dropTail) + } + + def cancel(jobId: String): Action[AnyContent] = WithUserAction.async { implicit request => + import scala.concurrent.duration._ + mat.system.actorSelection("user/" + jobId).resolveOne(5.seconds).map { ref => + logger.info(s"Cancelling job: $jobId") + ref ! LongRunningJob.Cancel + Ok(Json.obj("ok" -> true)) + }.recover { + // Check if job is already cancelled or missing... + case _: ActorNotFound => Ok(Json.obj("ok" -> JsNull)); + case e => InternalServerError(Json.obj("error" -> e.getMessage)) } } } diff --git a/modules/admin/app/controllers/datamodel/EntityTypeMetadata.scala b/modules/admin/app/controllers/datamodel/EntityTypeMetadata.scala index 3813f00705..a95b97738f 100644 --- a/modules/admin/app/controllers/datamodel/EntityTypeMetadata.scala +++ b/modules/admin/app/controllers/datamodel/EntityTypeMetadata.scala @@ -13,7 +13,15 @@ case class EntityTypeMetadata @Inject()( appComponents: AppComponents ) extends AdminController with ApiBodyParsers { + def index(): Action[AnyContent] = WithUserAction { implicit request => + Redirect(controllers.datamodel.routes.EntityTypeMetadata.editor()) + } + def editor(): Action[AnyContent] = WithUserAction { implicit request => Ok(views.html.admin.datamodel.editor()) } + + def auditor(): Action[AnyContent] = WithUserAction { implicit request => + Ok(views.html.admin.datamodel.auditor()) + } } diff --git a/modules/admin/app/controllers/datamodel/EntityTypeMetadataApi.scala b/modules/admin/app/controllers/datamodel/EntityTypeMetadataApi.scala index 5258927f29..dbf288323b 100644 --- a/modules/admin/app/controllers/datamodel/EntityTypeMetadataApi.scala +++ b/modules/admin/app/controllers/datamodel/EntityTypeMetadataApi.scala @@ -1,15 +1,19 @@ package controllers.datamodel +import actors.datamodel.AuditorManager +import actors.datamodel.AuditorManager.{AuditTask, AuditorJob} +import akka.actor.Props import akka.stream.Materializer import controllers.AppComponents import controllers.base.{AdminController, ApiBodyParsers} import models._ -import play.api.libs.json.{JsNull, Json} +import play.api.libs.json.{Format, JsNull, Json} import play.api.libs.ws.WSClient import play.api.mvc._ import services.data.DataHelpers import services.datamodel.EntityTypeMetadataService +import java.util.UUID import javax.inject._ import scala.concurrent.ExecutionContext @@ -81,4 +85,19 @@ case class EntityTypeMetadataApi @Inject()( Ok(Json.toJson(tpl.map(p => p._1.toString -> p._2))) } } + + def runAudit(): Action[AuditTask] = WithUserAction.async(apiJson[AuditTask]) { implicit request => + entityTypeMetaService.listEntityTypeFields(request.body.entityType).map { fields => + println("Running audit for entity type: " + request.body) + val jobId = UUID.randomUUID().toString + val job = AuditorJob(jobId = jobId, request.body) + mat.system.actorOf(Props(AuditorManager(job, searchEngine, searchResolver, fields)), jobId) + + Ok(Json.obj( + "url" -> controllers.admin.routes.Tasks.taskMonitorWS(jobId).webSocketURL(conf.https), + "cancelUrl" -> controllers.admin.routes.Tasks.cancel(jobId).url, + "jobId" -> jobId + )) + } + } } diff --git a/modules/admin/app/controllers/datasets/ImportDatasets.scala b/modules/admin/app/controllers/datasets/ImportDatasets.scala index 36d80f0e3d..74b30a2a11 100644 --- a/modules/admin/app/controllers/datasets/ImportDatasets.scala +++ b/modules/admin/app/controllers/datasets/ImportDatasets.scala @@ -38,6 +38,7 @@ case class ImportDatasets @Inject()( Ok( JavaScriptReverseRouter("datasetApi")( controllers.admin.routes.javascript.Tasks.taskMonitorWS, + controllers.admin.routes.javascript.Tasks.cancel, controllers.datasets.routes.javascript.ImportDatasets.manager, controllers.datasets.routes.javascript.ImportDatasets.list, controllers.datasets.routes.javascript.ImportDatasets.listAll, @@ -48,7 +49,6 @@ case class ImportDatasets @Inject()( controllers.datasets.routes.javascript.ImportDatasets.batch, controllers.datasets.routes.javascript.ImportDatasets.fileCount, controllers.datasets.routes.javascript.ImportDatasets.errors, - controllers.datasets.routes.javascript.LongRunningJobs.cancel, controllers.datasets.routes.javascript.ImportFiles.listFiles, controllers.datasets.routes.javascript.ImportFiles.info, controllers.datasets.routes.javascript.ImportFiles.validateFiles, diff --git a/modules/admin/app/controllers/datasets/LongRunningJobs.scala b/modules/admin/app/controllers/datasets/LongRunningJobs.scala deleted file mode 100644 index 28d4e5865c..0000000000 --- a/modules/admin/app/controllers/datasets/LongRunningJobs.scala +++ /dev/null @@ -1,33 +0,0 @@ -package controllers.datasets - -import actors.LongRunningJob -import akka.actor.ActorNotFound -import akka.stream.Materializer -import controllers.AppComponents -import controllers.base.AdminController -import controllers.generic.Update -import models._ -import play.api.libs.json.{JsNull, Json} -import play.api.mvc._ - -import javax.inject.{Inject, Singleton} - -@Singleton -case class LongRunningJobs @Inject()( - controllerComponents: ControllerComponents, - appComponents: AppComponents, -)(implicit mat: Materializer) extends AdminController with Update[Repository] { - - def cancel(id: String, jobId: String): Action[AnyContent] = EditAction(id).async { implicit request => - import scala.concurrent.duration._ - mat.system.actorSelection("user/" + jobId).resolveOne(5.seconds).map { ref => - logger.info(s"Cancelling job: $jobId") - ref ! LongRunningJob.Cancel - Ok(Json.obj("ok" -> true)) - }.recover { - // Check if job is already cancelled or missing... - case _: ActorNotFound => Ok(Json.obj("ok" -> JsNull)); - case e => InternalServerError(Json.obj("error" -> e.getMessage)) - } - } -} diff --git a/modules/admin/app/utils/WebsocketConstants.scala b/modules/admin/app/utils/WebsocketConstants.scala index a5b83aa27c..4d65a5aa75 100644 --- a/modules/admin/app/utils/WebsocketConstants.scala +++ b/modules/admin/app/utils/WebsocketConstants.scala @@ -6,5 +6,6 @@ object WebsocketConstants { * as the search index update job. */ val DONE_MESSAGE = "Done" + val INFO_MESSAGE = "Info" val ERR_MESSAGE = "Error" } diff --git a/modules/admin/app/views/admin/Helpers.scala b/modules/admin/app/views/admin/Helpers.scala index ecf53b9a28..67b67a672e 100644 --- a/modules/admin/app/views/admin/Helpers.scala +++ b/modules/admin/app/views/admin/Helpers.scala @@ -34,6 +34,8 @@ object Helpers { ("ingest.datasets", controllers.datasets.routes.ImportDatasets.dashboard().url), ("s4", "-"), ("dataModel", controllers.datamodel.routes.EntityTypeMetadata.editor().url), + ("dataModel.audit", controllers.datamodel.routes.EntityTypeMetadata.auditor().url), + ("s5", "-"), ("search.index.update", controllers.admin.routes.Indexing.updateIndex().url), ("admin.utils.findReplace", controllers.tools.routes.Tools.findReplace().url), ("admin.utils.regenerateIds", controllers.tools.routes.Tools.regenerateIds().url), diff --git a/modules/admin/app/views/admin/datamodel/auditor.scala.html b/modules/admin/app/views/admin/datamodel/auditor.scala.html new file mode 100644 index 0000000000..82e26c98e5 --- /dev/null +++ b/modules/admin/app/views/admin/datamodel/auditor.scala.html @@ -0,0 +1,55 @@ +@()(implicit userOpt: Option[UserProfile], req: RequestHeader, conf: AppConfig, messages: Messages, md: MarkdownRenderer, prefs: SessionPrefs, flash: Flash) + +@styles = { + +} + +@breadcrumbRoot = { + @Messages("dataModel.audit") +} + +@views.html.admin.layout.adminSinglePageAppLayout(Messages("dataModel.audit"), styles = styles, + breadcrumbs = views.html.admin.common.breadcrumbs(List(), breadcrumbRoot)) { +} { + @helper.javascriptRouter("dataModelApi")( + controllers.admin.routes.javascript.Data.getItemType, + controllers.admin.routes.javascript.Tasks.cancel, + controllers.datamodel.routes.javascript.EntityTypeMetadataApi.templates, + controllers.datamodel.routes.javascript.EntityTypeMetadataApi.list, + controllers.datamodel.routes.javascript.EntityTypeMetadataApi.i18n, + controllers.datamodel.routes.javascript.EntityTypeMetadataApi.runAudit + ) + + + + +} diff --git a/modules/admin/app/views/admin/datasets/dashboard.scala.html b/modules/admin/app/views/admin/datasets/dashboard.scala.html index f7c40da624..d5b6fe6bc7 100644 --- a/modules/admin/app/views/admin/datasets/dashboard.scala.html +++ b/modules/admin/app/views/admin/datasets/dashboard.scala.html @@ -17,7 +17,7 @@ Vue.createApp({ render: function() { return Vue.h(DashboardApp.default, { - service: datasetApi.controllers.datasets, + service: datasetApi.controllers, config: { title: "@Messages("ingest.datasets")" } diff --git a/modules/admin/app/views/admin/datasets/manager.scala.html b/modules/admin/app/views/admin/datasets/manager.scala.html index ea413415b5..aaa9320332 100644 --- a/modules/admin/app/views/admin/datasets/manager.scala.html +++ b/modules/admin/app/views/admin/datasets/manager.scala.html @@ -16,7 +16,7 @@ Vue.createApp({ render: function() { return Vue.h(App.default, { - service: datasetApi.controllers.datasets, + service: datasetApi.controllers, config: { repoId: "@item.id", versioned: @isVersioned, @@ -24,7 +24,7 @@ output: "@FileStage.Output", config: "@FileStage.Config", admin: @userOpt.forall(_.isAdmin), - previewLoader: "@controllers.admin.routes.AdminAssets.versioned("js/datasets/worker.js")", + websocketHandler: "@controllers.admin.routes.AdminAssets.versioned("js/datasets/worker.js")", monitorUrl: jobId => datasetApi.controllers.admin.Tasks.taskMonitorWS(jobId) .absoluteURL(location.protocol === "https:") .replace(/^http/, "ws"), diff --git a/modules/admin/app/views/admin/index.scala.html b/modules/admin/app/views/admin/index.scala.html index 8ff3cf03a5..6ec65f3d73 100644 --- a/modules/admin/app/views/admin/index.scala.html +++ b/modules/admin/app/views/admin/index.scala.html @@ -54,5 +54,11 @@ @Messages("admin.recentActivity") @Messages("cypherQuery.list") } + @views.html.admin.common.sidebarAction() { + + + @Messages("dataModel.audit") + + } } } diff --git a/modules/admin/conf/admin.routes b/modules/admin/conf/admin.routes index 883a233748..0d202054d1 100644 --- a/modules/admin/conf/admin.routes +++ b/modules/admin/conf/admin.routes @@ -20,6 +20,7 @@ GET /ingest @controllers.admin.Ingest.ingest # Tasks GET /tasks/monitor @controllers.admin.Tasks.taskMonitorWS(jobId: String) +DELETE /tasks/cancel/:jobId @controllers.admin.Tasks.cancel(jobId: String) # Search testing GET /search @controllers.admin.AdminSearch.search(params: services.search.SearchParams ?= services.search.SearchParams.empty, paging: utils.PageParams ?= utils.PageParams.empty) diff --git a/modules/admin/conf/datamodel.routes b/modules/admin/conf/datamodel.routes index 1c2bdb68b0..158825aa44 100644 --- a/modules/admin/conf/datamodel.routes +++ b/modules/admin/conf/datamodel.routes @@ -1,10 +1,14 @@ # Routes for datamodel API -GET / @controllers.datamodel.EntityTypeMetadata.editor() +GET / @controllers.datamodel.EntityTypeMetadata.index() +GET /editor @controllers.datamodel.EntityTypeMetadata.editor() +GET /audit @controllers.datamodel.EntityTypeMetadata.auditor() + GET /templates @controllers.datamodel.EntityTypeMetadataApi.templates() GET /i18n @controllers.datamodel.EntityTypeMetadataApi.i18n() +POST /run-audit @controllers.datamodel.EntityTypeMetadataApi.runAudit() GET /entities @controllers.datamodel.EntityTypeMetadataApi.list() -GET /entities/:entityType @controllers.datamodel.EntityTypeMetadataApi.get(entityType :models.EntityType.Value) +GET /entities/:entityType @controllers.datamodel.EntityTypeMetadataApi.get(entityType: models.EntityType.Value) POST /entities/:entityType @controllers.datamodel.EntityTypeMetadataApi.save(entityType: models.EntityType.Value) DELETE /entities/:entityType @controllers.datamodel.EntityTypeMetadataApi.delete(entityType: models.EntityType.Value) GET /fields @controllers.datamodel.EntityTypeMetadataApi.listFields(entityType: Option[models.EntityType.Value] ?= None) diff --git a/modules/admin/conf/datasets.routes b/modules/admin/conf/datasets.routes index a7b6cd34db..bc69a1be83 100644 --- a/modules/admin/conf/datasets.routes +++ b/modules/admin/conf/datasets.routes @@ -4,9 +4,6 @@ GET / @controllers.datasets. GET /routes @controllers.datasets.ImportDatasets.jsRoutes() GET /:id @controllers.datasets.ImportDatasets.manager(id: String, ds: Option[String] ?= None) -# API/JSON routes -DELETE /api/:id/cancel @controllers.datasets.LongRunningJobs.cancel(id: String, jobId: String) - GET /api/list @controllers.datasets.ImportDatasets.listAll() POST /api/:id/toggle-versioning @controllers.datasets.ImportDatasets.toggleVersioning(id: String, enabled: Boolean ?= true) GET /api/:id/datasets-stats @controllers.datasets.ImportDatasets.stats(id: String) diff --git a/modules/admin/conf/messages b/modules/admin/conf/messages index bb1e4d2ebc..9fed1baecc 100644 --- a/modules/admin/conf/messages +++ b/modules/admin/conf/messages @@ -1578,3 +1578,18 @@ cleanup.done=Cleanup complete dataModel.advisories={0,choice,0#No advisories|1#1 Metadata quality advisory|1<{0,number,integer} Metadata quality advisories} dataModel.missingMandatoryFields={0,choice,0#No missing mandatory fields|1#1 missing mandatory field|1<{0,number,integer} missing mandatory fields} dataModel.missingDesirableFields={0,choice,0#No missing desirable fields|1#1 missing desirable field|1<{0,number,integer} missing desirable fields} +dataModel.audit=Metadata Audit +dataModel.audit.completed={0}: Metadata audit completed, {1,choice,0#0 items checked|1#1 item checked|1<{1,number,integer} items checked}, {2} flagged in {3,number,integer} seconds +dataModel.audit.error={0}: Metadata audit error: {1} +dataModel.audit.cancelled={0}: Metadata audit cancelled +dataModel.audit.checked={0}: Checked {1,choice,0#0 items|1#1 item|1<{1,number,integer} items} +dataModel.audit.entityType=Entity Type +dataModel.audit.entityType.description=The type of entity to audit +dataModel.audit.mandatoryOnly=Check Mandatory Fields Only +dataModel.audit.mandatoryOnly.description=Check only mandatory fields, ignoring desirable fields +dataModel.audit.idPrefix=ID Prefix +dataModel.audit.idPrefix.description=Narrow the scope of the audit scan by specifying an ID prefix, e.g. 'gb-00' for UK items... +dataModel.audit.runAudit=Run Audit +dataModel.audit.fields.entity=Entity +dataModel.audit.fields.mandatory=Mandatory +dataModel.audit.fields.desirable=Desirable diff --git a/modules/core/src/main/scala/models/Entity.scala b/modules/core/src/main/scala/models/Entity.scala index 8de3994a4b..8758b81c27 100644 --- a/modules/core/src/main/scala/models/Entity.scala +++ b/modules/core/src/main/scala/models/Entity.scala @@ -39,6 +39,10 @@ object Entity { * Format for a generic entity. */ val entityFormat: Format[Entity] = Format(entityReads, entityWrites) + + implicit object EntityReadable extends Readable[Entity] { + val _reads: Reads[Entity] = entityReads + } } case class Entity( diff --git a/modules/core/src/main/scala/models/FieldMetadataSet.scala b/modules/core/src/main/scala/models/FieldMetadataSet.scala index 14d0831ff4..ec49db748b 100644 --- a/modules/core/src/main/scala/models/FieldMetadataSet.scala +++ b/modules/core/src/main/scala/models/FieldMetadataSet.scala @@ -5,7 +5,9 @@ import play.api.libs.json._ import scala.collection.immutable.ListMap -sealed trait ValidationError +sealed trait ValidationError { + def id: String +} case class MissingMandatoryField(id: String) extends ValidationError case class MissingDesirableField(id: String) extends ValidationError @@ -27,7 +29,7 @@ case class FieldMetadataSet(fieldMetadata: ListMap[String, FieldMetadata]) { def noCategory: Seq[FieldMetadata] = fieldMetadata.values.filter(_.category.isEmpty).toSeq - def validate[T: Writable](data: T): Seq[ValidationError] = { + def validate(entity: Entity): Seq[ValidationError] = { def flattenEntity(e: Entity): Seq[String] = { // gather all populated keys in the entity and its child relationships. This // might lead to some missed attributes, because keys may be shared. But it's @@ -36,18 +38,21 @@ case class FieldMetadataSet(fieldMetadata: ListMap[String, FieldMetadata]) { e.relationships.values.toSeq.flatten.flatMap { r => flattenEntity(r) } } + val fms = fieldMetadata.values.filter(_.entityType == entity.isA).toSeq + val allKeys = flattenEntity(entity) + fms.filter(_.usage.contains(FieldMetadata.Usage.Mandatory)).flatMap { fm => + if (!allKeys.contains(fm.id)) Some(MissingMandatoryField(fm.id)) + else None + } ++ fms.filter(_.usage.contains(FieldMetadata.Usage.Desirable)).flatMap { fm => + if (!allKeys.contains(fm.id)) Some(MissingDesirableField(fm.id)) + else None + } + } + + def validate[T: Writable](data: T): Seq[ValidationError] = { val json = Json.toJson(data)(implicitly[Writable[T]]._format) json.validate[Entity] match { - case JsSuccess(entity, _) => - val fms = fieldMetadata.values.filter(_.entityType == entity.isA).toSeq - val allKeys = flattenEntity(entity) - fms.filter(_.usage.contains(FieldMetadata.Usage.Mandatory)).flatMap { fm => - if (!allKeys.contains(fm.id)) Some(MissingMandatoryField(fm.id)) - else None - } ++ fms.filter(_.usage.contains(FieldMetadata.Usage.Desirable)).flatMap { fm => - if (!allKeys.contains(fm.id)) Some(MissingDesirableField(fm.id)) - else None - } + case JsSuccess(entity, _) => validate(entity) case JsError(errors) => logger.error(s"FieldMetadataSet.validate: failed to parse entity data: ${Json.prettyPrint(json)}: $errors") Seq.empty diff --git a/modules/core/src/main/scala/services/search/SearchConstants.scala b/modules/core/src/main/scala/services/search/SearchConstants.scala index 133f2ccb2d..bb34180e56 100644 --- a/modules/core/src/main/scala/services/search/SearchConstants.scala +++ b/modules/core/src/main/scala/services/search/SearchConstants.scala @@ -84,7 +84,7 @@ object SearchConstants { final val NAME_NGRAM = "name_ngram" /** - * The field referring to the "item"'s id, rather than + * The field referring to the item's id, rather than * that of the description. */ final val ITEM_ID = "itemId" diff --git a/test/actors/datamodel/AuditorSpec.scala b/test/actors/datamodel/AuditorSpec.scala new file mode 100644 index 0000000000..b0818f8795 --- /dev/null +++ b/test/actors/datamodel/AuditorSpec.scala @@ -0,0 +1,67 @@ +package actors.datamodel + +import actors.LongRunningJob.Cancel +import actors.datamodel.Auditor.RunAudit +import actors.datamodel.AuditorManager.{AuditTask, AuditorJob} +import akka.actor.Props +import helpers.IntegrationTestRunner +import mockdata.adminUserProfile +import models.{EntityType, FieldMetadata, FieldMetadataSet, UserProfile} +import play.api.Application +import services.search.{SearchEngine, SearchItemResolver} + +import scala.collection.immutable.ListMap + + +/** + * This spec runs an audit against all DocumentaryUnits for the + * mandatory field "locationOfOriginals" and tests the auditor + * actor gives the correct responses. + */ +class AuditorSpec extends IntegrationTestRunner { + + private def searchEngine(implicit app: Application) = app.injector.instanceOf[SearchEngine] + private def resolver(implicit app: Application) = app.injector.instanceOf[SearchItemResolver] + + private implicit val userOpt: Option[UserProfile] = Some(adminUserProfile) + + private val fieldMetadataSet: FieldMetadataSet = FieldMetadataSet( + fieldMetadata = ListMap( + "locationOfOriginals" -> FieldMetadata( + entityType = EntityType.DocumentaryUnit, + "locationOfOriginals", + "Location of Originals", + usage = Some(FieldMetadata.Usage.Mandatory), + ) + ) + ) + + private def job(implicit app: Application): AuditorJob = AuditorJob( + "test-job-id", + AuditTask( + EntityType.DocumentaryUnit, + None, + mandatoryOnly = true + ) + ) + + "Auditor runner" should { + + "send correct messages when auditing an entity type" in new ITestAppWithAkka { + val runner = system.actorOf(Props(Auditor(searchEngine, resolver, fieldMetadataSet, 5, 10))) + + runner ! RunAudit(job, None) + expectMsg(Auditor.Checked(0)) + expectMsgClass(classOf[Auditor.CheckBatch]) + expectMsgClass(classOf[Auditor.Completed]) + } + + "allow cancellation" in new ITestAppWithAkka { + val runner = system.actorOf(Props(Auditor(searchEngine, resolver, fieldMetadataSet, 5, 10))) + + runner ! RunAudit(job, None) + runner ! Cancel + expectMsgClass(classOf[Auditor.Cancelled]) + } + } +}