diff --git a/packages/wikidata-experimental-adapter/README.md b/packages/wikidata-experimental-adapter/README.md index 603b979ca..815dec86f 100644 --- a/packages/wikidata-experimental-adapter/README.md +++ b/packages/wikidata-experimental-adapter/README.md @@ -1,39 +1,8 @@ -# Wikidata experimental adapter +# Wikidata experimental adapter v2 -A first iteration of a Wikidata integration to the Dataspecer tool. -The client queries the Wikidata SPARQL endpoint. +A second iteration of a Wikidata integration to the Dataspecer tool. +The client queries the Wikidata backend with extracted ontology. -## Comments - -- The root search - - It queries entire Wikidata, that means the root can be any entity from the Wikidata, including instances and properties. - - It should handle only English language as of now. -- Hierarchy - - The hierarchy is made up of following `subclass of` properties to the parents. - - Using the SPARQL it can follow the `subclass of` property in reverse order and get children. -- Surroundings - - For each part of the surroundings (parents, children and associations with endpoints) stands a separate SPARQL query. - - Parents and children are the same as in hierarchy but only in the depth 1. - - Associations: - - Associations are created from `subject type` and `value type` constraints on properties. - - To find properties of a class it queries the SPARQL for properties that the class can be `subject of` or `value of`. - - If the class is `subject of` a property, then associations are created so that the `value types` are the ends of outgoing edge. In reverse if the class is `value of` a property, then the incoming edges are with endpoints of the `subject types` of the property. - - If the class is a `subject of` property but the property has a literal type, then it is an attribute. - -## What can it do? - -- search - - search based on string - - search based on iri -- full hierarchy - - children - - parents -- surroundings - - parents in height 1 - - children in depth 1 - - attributes (wikidata properties that do not point to items based on subject contraint) - - associations - ## How to start it up for development? 1. `> git clone repository` diff --git a/packages/wikidata-experimental-adapter/src/connector/entities/constraints.ts b/packages/wikidata-experimental-adapter/src/connector/entities/constraints.ts new file mode 100644 index 000000000..4f849c2c5 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/entities/constraints.ts @@ -0,0 +1,44 @@ +import type { EntityId, EntityIdsList } from './wd-entity'; + +export enum PropertyScopeValue { + AS_MAIN = 0, + AS_QUALIFIER = 1, + AS_REFERENCE = 2, +} + +export enum AllowedEntityTypesValue { + ITEM = 0, + PROPERTY = 1, + LEXEME = 2, + FORM = 3, + SENSE = 4, + MEDIA_INFO = 5, +} + +export type StatementAllowanceMap = Record; + +export interface SubjectValueTypeContraint { + readonly subclassOf: EntityIdsList; + readonly instanceOf: EntityIdsList; + readonly subclassOfInstanceOf: EntityIdsList; +} +export interface GeneralConstraints { + readonly propertyScope: readonly PropertyScopeValue[]; + readonly allowedEntityTypes: readonly AllowedEntityTypesValue[]; + readonly allowedQualifiers: EntityIdsList; + readonly requiredQualifiers: EntityIdsList; + readonly conflictsWith: StatementAllowanceMap; + readonly itemRequiresStatement: StatementAllowanceMap; + readonly subjectType: SubjectValueTypeContraint; +} + +export interface ItemTypeConstraints { + readonly valueType: SubjectValueTypeContraint; + readonly valueRequiresStatement: StatementAllowanceMap; + readonly isSymmetric: boolean; + readonly oneOf: EntityIdsList; + readonly noneOf: EntityIdsList; + readonly inverse: null | EntityId; +} + +export type EmptyTypeConstraint = null; \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/connector/entities/wd-class.ts b/packages/wikidata-experimental-adapter/src/connector/entities/wd-class.ts new file mode 100644 index 000000000..12b1aa52e --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/entities/wd-class.ts @@ -0,0 +1,12 @@ +import { IWdEntity, EntityIdsList, ExternalOntologyMapping } from './wd-entity'; + +export const ROOT_CLASS_ID = 35120; + +export interface IWdClass extends IWdEntity { + readonly subclassOf: EntityIdsList; + readonly children?: EntityIdsList; + readonly propertiesForThisType: EntityIdsList; + readonly equivalentExternalOntologyClasses: ExternalOntologyMapping; + readonly subjectOfProperty: EntityIdsList; + readonly valueOfProperty: EntityIdsList; +} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/connector/entities/wd-entity.ts b/packages/wikidata-experimental-adapter/src/connector/entities/wd-entity.ts new file mode 100644 index 000000000..96b0eece1 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/entities/wd-entity.ts @@ -0,0 +1,19 @@ +export type LanguageMap = Record; + +export type EntityId = number; +export type EntityIdsList = readonly EntityId[]; + +export type ExternalEntityId = string; +export type ExternalOntologyMapping = readonly ExternalEntityId[]; + +export enum EntityTypes { + CLASS, + PROPERTY, +} + +export interface IWdEntity { + readonly id: EntityId; + readonly labels: LanguageMap; + readonly descriptions: LanguageMap; + readonly instanceOf: EntityIdsList; +} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/connector/entities/wd-property.ts b/packages/wikidata-experimental-adapter/src/connector/entities/wd-property.ts new file mode 100644 index 000000000..c4c9a7bcb --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/entities/wd-property.ts @@ -0,0 +1,45 @@ +import type { IWdEntity, EntityIdsList, ExternalOntologyMapping } from './wd-entity'; +import { type EmptyTypeConstraint, GeneralConstraints, type ItemTypeConstraints } from './constraints'; + +export enum UnderlyingType { + ENTITY = 0, + STRING = 1, + TIME = 2, + QUANTITY = 3, + GLOBE_COORDINATE = 4, +} + +export enum Datatype { + ITEM = 0, + PROPERTY = 1, + LEXEME = 2, + SENSE = 3, + FORM = 4, + MONOLINGUAL_TEXT = 5, + STRING = 6, + EXTERNAL_IDENTIFIER = 7, + URL = 8, + COMMONS_MEDIA_FILE = 9, + GEOGRAPHIC_SHAPE = 10, + TABULAR_DATA = 11, + MATHEMATICAL_EXPRESSION = 12, + MUSICAL_NOTATION = 13, + QUANTITY = 14, + POINT_IN_TIME = 15, + GEOGRAPHIC_COORDINATES = 16, +} + +export interface IWdProperty extends IWdEntity { + readonly datatype: Datatype; + readonly underlyingType: UnderlyingType; + readonly subpropertyOf: EntityIdsList; + readonly relatedProperty: EntityIdsList; + readonly equivalentExternalOntologyProperties: ExternalOntologyMapping; + readonly generalConstraints: GeneralConstraints; + + readonly itemConstraints?: ItemTypeConstraints; + readonly stringConstraints?: EmptyTypeConstraint; + readonly quantityConstraints?: EmptyTypeConstraint; + readonly timeConstraints?: EmptyTypeConstraint; + readonly coordinatesConstraints?: EmptyTypeConstraint; +} diff --git a/packages/wikidata-experimental-adapter/src/connector/response.ts b/packages/wikidata-experimental-adapter/src/connector/response.ts new file mode 100644 index 000000000..4e1916794 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/response.ts @@ -0,0 +1,57 @@ +import { IWdClass } from "./entities/wd-class"; +import { IWdProperty } from "./entities/wd-property"; + +// Error response + +export interface IErrorResponse { + statusCode: number, + error: string, + message: string +} + +// Search api + +export interface ISearchResults { + classes: IWdClass[] +} + +export interface ISearchResponse { + results: ISearchResults; +} + +// Get class api + +export interface IGetClassResults { + classes: IWdClass[] +} + +export interface IGetClassResponse { + results: IGetClassResults +} + +// Get hierarchy api + +export interface IHierarchyResults { + root: IWdClass + parents: IWdClass[] + children: IWdClass[] +} + +export interface IHierarchyResponse { + results: IHierarchyResults +} + +// Get surroundings api + +export interface ISurroundingsResults { + root: IWdClass + parents: IWdClass[] + children: IWdClass[] + subjectOf: IWdProperty[] + valueOf: IWdProperty[] + propertyEndpoints: IWdClass[] +} + +export interface ISurroundingsResponse { + results: ISurroundingsResults +} diff --git a/packages/wikidata-experimental-adapter/src/connector/wd-connector.ts b/packages/wikidata-experimental-adapter/src/connector/wd-connector.ts new file mode 100644 index 000000000..118211106 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/connector/wd-connector.ts @@ -0,0 +1,49 @@ +import { HttpFetch } from "@dataspecer/core/io/fetch/fetch-api"; +import { IGetClassResponse, IHierarchyResponse, ISearchResponse, ISurroundingsResponse } from "./response"; +import { EntityId } from "./entities/wd-entity"; + +export class WdConnector { + private readonly BASE_URL = "http://localhost:3042/api/v1"; + private readonly API_ENDPOINTS = { + search: (query: string) => this.BASE_URL + `/search?query=${encodeURI(query)}`, + getClass: (id: EntityId) => this.BASE_URL + `/classes/${id}`, + hierarchy: (id: EntityId, part: 'full' | 'parents' | 'children' ) => this.BASE_URL + `/classes/${id}/hierarchy?part=${part}`, + surroundings: (id: EntityId) => this.BASE_URL + `/classes/${id}/surroundings`, + }; + + private readonly httpFetch: HttpFetch; + + constructor(httpFetch: HttpFetch) { + this.httpFetch = httpFetch; + } + + private isIErrorResponse(response: object): boolean { + return 'statusCode' in response && + 'message' in response && + 'error' in response; + } + + public async search(query: string): Promise { + const url = this.API_ENDPOINTS.search(query); + const resp = await ((await this.httpFetch(url)).json()) as object + return this.isIErrorResponse(resp) ? undefined : resp as ISearchResponse; + } + + public async getClass(id: EntityId): Promise { + const url = this.API_ENDPOINTS.getClass(id); + const resp = await ((await this.httpFetch(url)).json()) as object + return this.isIErrorResponse(resp) ? undefined : resp as IGetClassResponse; + } + + public async hierarchy(id: EntityId): Promise { + const url = this.API_ENDPOINTS.hierarchy(id, 'parents'); + const resp = await ((await this.httpFetch(url)).json()) as object + return this.isIErrorResponse(resp) ? undefined : resp as IHierarchyResponse; + } + + public async surroundings(id: EntityId): Promise { + const url = this.API_ENDPOINTS.surroundings(id); + const resp = await ((await this.httpFetch(url)).json()) as object + return this.isIErrorResponse(resp) ? undefined : resp as ISurroundingsResponse; + } +} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-entity-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-entity-adapter.ts deleted file mode 100644 index db375968d..000000000 --- a/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-entity-adapter.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { LanguageString } from "@dataspecer/core/core"; -import { WIKIDATA, WIKIDATA_ENTITY_PREFIX } from "../vocabulary"; -import { PimClass, PimResource } from "@dataspecer/core/pim/model"; -import { IriProvider } from "@dataspecer/core/cim"; - -type WikidataLanguageValue = { - language: string; - value: string; -} - -type WikidataLanguageObject = Record; - -type WikidataMainSnakEntityValue = { - value: { - id: string; - } -} - -type WikidataMainSnak = { - datavalue?: WikidataMainSnakEntityValue; -} - -type WikidataSnak = { - mainsnak: WikidataMainSnak; -} - -type WikidataClaims = Record; - -export class WikidataItemPhpWrap { - wikidataJsonObject: object; - - constructor(wikiwikidataJsonObject: object) { - this.wikidataJsonObject = wikiwikidataJsonObject; - } - - getIri(): string { - return WIKIDATA_ENTITY_PREFIX + this.wikidataJsonObject['id']; - } - - getEntityType(): string { - return this.wikidataJsonObject['type']; - } - - getParents(): string[] { - let parentsIris: string[] = []; - if ('claims' in this.wikidataJsonObject) { - const claims = this.wikidataJsonObject['claims'] as WikidataClaims; - const subclassOfProperty = this.getLastPartOfIri(WIKIDATA.subclassOf); - if (subclassOfProperty in claims) { - const parentsSnaks = claims[subclassOfProperty]; - parentsSnaks.forEach((snak) => { - const id = snak.mainsnak.datavalue?.value.id; - if (id != null) - parentsIris.push(WIKIDATA_ENTITY_PREFIX + id); - }); - } - } - return parentsIris; - } - - getDescriptions(): LanguageString { - if ('descriptions' in this.wikidataJsonObject) { - return this.convertWikidataLanguageObjectToLanguageStrings(this.wikidataJsonObject['descriptions'] as WikidataLanguageObject); - } - return {}; - } - - getLabels(): LanguageString { - if ('labels' in this.wikidataJsonObject) { - return this.convertWikidataLanguageObjectToLanguageStrings(this.wikidataJsonObject['labels'] as WikidataLanguageObject); - } - return {}; - } - - private convertWikidataLanguageObjectToLanguageStrings(wlo: WikidataLanguageObject): LanguageString { - return Object.fromEntries(Object.entries(wlo).map((o) => [o[1].language, o[1].value])); - } - - private getLastPartOfIri(iri: string): string { - return iri.split("/").pop(); - } - -} - -export async function loadWikidataEntityFromPhpWrapToResource(entity: WikidataItemPhpWrap, idProvider: IriProvider, resource: PimResource): Promise { - resource.pimInterpretation = entity.getIri(); - resource.iri = idProvider.cimToPim(entity.getIri()); - resource.pimHumanDescription = entity.getDescriptions(); - resource.pimHumanLabel = entity.getLabels(); -} - \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-item-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-item-adapter.ts deleted file mode 100644 index 66f054863..000000000 --- a/packages/wikidata-experimental-adapter/src/entity-adapters/php-api-wikidata-item-adapter.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { PimClass } from "@dataspecer/core/pim/model"; -import { WikidataItemPhpWrap, loadWikidataEntityFromPhpWrapToResource } from "./php-api-wikidata-entity-adapter"; -import { IriProvider } from "@dataspecer/core/cim"; - - -export async function isWikidataItemPhp(entity: WikidataItemPhpWrap): Promise { - return entity.getEntityType() === 'item'; -} - -export async function loadWikidataItemFromPhpWrap( - entity: WikidataItemPhpWrap, - idProvider: IriProvider -): Promise { - const result = new PimClass(); - await loadWikidataEntityFromPhpWrapToResource(entity, idProvider, result); - result.pimIsCodelist = false; - result.pimExtends = unique(entity.getParents().map(idProvider.cimToPim)); - return result; -} - -function unique(values: T[]): T[] { - return [...new Set(values)]; -} diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-association-attribute-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-association-attribute-adapter.ts deleted file mode 100644 index 0bb68fba1..000000000 --- a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-association-attribute-adapter.ts +++ /dev/null @@ -1,100 +0,0 @@ -import { RdfSource, RdfSourceWrap } from "@dataspecer/core/core/adapter/rdf"; -import { PimAssociation, PimAssociationEnd, PimAttribute } from "@dataspecer/core/pim/model"; -import { loadWikidataEntityToResource } from "./sparql-wikidata-entity-adapter"; -import { RDFS, WIKIBASE, WIKIDATA } from "../vocabulary"; -import { IriProvider } from "@dataspecer/core/cim"; -import { CoreResource } from "@dataspecer/core/core"; - -async function isWikidataOutwardAssociation( - entity: RdfSourceWrap -): Promise { - return (await entity.types()).includes(WIKIDATA.valueTypeConstraint); -} - -async function isWikidataInwardAssociation( - entity: RdfSourceWrap -): Promise { - return (await entity.types()).includes(WIKIDATA.subjectTypeConstraint); -} - -async function isAssociationFakeAttribute( - entity: RdfSourceWrap -): Promise { - return (await entity.node(WIKIBASE.propertyType)) !== WIKIBASE.wikibaseItem; -} - -export async function loadWikidataAssociationOrAttribute( - rootCimIri: string, - entity: RdfSourceWrap, - source: RdfSource, - idProvider: IriProvider -): Promise<[CoreResource[], string[]]> { - if (await isAssociationFakeAttribute(entity)) { - const pimAttribute = await loadWikidataFakeAttribute(rootCimIri, entity, idProvider); - return [[pimAttribute],[]]; - } - - let coreResources: CoreResource[] = []; - let newClassesIris: string[] = []; - - if (await isWikidataOutwardAssociation(entity)) { - const possibleObjects = await entity.property(RDFS.range); - for await (const o of possibleObjects) { - coreResources.push(...(await loadWikidataAssociation("out", rootCimIri, o.value, entity, idProvider))) - newClassesIris.push(o.value); - } - } - - if (await isWikidataInwardAssociation(entity)) { - const possibleSubjects = await entity.property(RDFS.domain); - for await (const s of possibleSubjects) { - if (s.value === rootCimIri) - continue; - coreResources.push(...(await loadWikidataAssociation("in", s.value, rootCimIri, entity, idProvider))) - newClassesIris.push(s.value); - } - } - - - return [coreResources, newClassesIris]; -} - -async function loadWikidataAssociation( - inOrOut: "in" | "out", - startIri: string, - endIri: string, - entityAssociation: RdfSourceWrap, - idProvider: IriProvider -): Promise<[PimAssociationEnd, PimAssociation, PimAssociationEnd]> { - const mediates1 = new PimAssociationEnd(); - mediates1.iri = idProvider.cimToPim(entityAssociation.iri + "#end-1-" + inOrOut + "-" + getLastPartOfIri(startIri) + "-" + getLastPartOfIri(endIri)); - mediates1.pimPart = idProvider.cimToPim(startIri); - - const mediates2 = new PimAssociationEnd(); - mediates2.iri = idProvider.cimToPim(entityAssociation.iri + "#end-2-" + inOrOut + "-" + getLastPartOfIri(startIri) + "-" + getLastPartOfIri(endIri)); - mediates2.pimPart = idProvider.cimToPim(endIri); - - const association = new PimAssociation(); - await loadWikidataEntityToResource(entityAssociation, idProvider, association); - association.iri += "#edge-" + inOrOut + "-" + getLastPartOfIri(startIri) + "-" + getLastPartOfIri(endIri); - association.pimIsOriented = true; - association.pimEnd = [mediates1.iri, mediates2.iri]; - - return [mediates1, association, mediates2]; -} - -async function loadWikidataFakeAttribute( - rootCimIri: string, - entity: RdfSourceWrap, - idProvider: IriProvider -): Promise { - const result = new PimAttribute(); - await loadWikidataEntityToResource(entity, idProvider, result); - result.iri += "#fake-attribute-" + getLastPartOfIri(rootCimIri); - result.pimOwnerClass = idProvider.cimToPim(rootCimIri); - return result; -} - -function getLastPartOfIri(iri: string): string { - return iri.split("/").pop(); -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-entity-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-entity-adapter.ts deleted file mode 100644 index 4adbb83d6..000000000 --- a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-entity-adapter.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { RdfObject, RdfSourceWrap } from "@dataspecer/core/core/adapter/rdf"; -import { PimResource } from "@dataspecer/core/pim/model"; -import { LanguageString } from "@dataspecer/core/core"; -import { RDFS, SCHEMA } from "../vocabulary"; -import { IriProvider } from "@dataspecer/core/cim"; - -export async function loadWikidataEntityToResource( - entity: RdfSourceWrap, - idProvider: IriProvider, - resource: PimResource -): Promise { - const prefLabel = await entity.property(RDFS.label); - resource.pimHumanLabel = rdfObjectsToLanguageString(prefLabel); - const definition = await entity.property(SCHEMA.description); - resource.pimHumanDescription = rdfObjectsToLanguageString(definition); - resource.pimInterpretation = entity.iri; - resource.iri = idProvider.cimToPim(resource.pimInterpretation); -} - -function rdfObjectsToLanguageString(objects: RdfObject[]): LanguageString { - return Object.fromEntries(objects.map((o) => [o.language, o.value])); -} diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-item-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-item-adapter.ts deleted file mode 100644 index 8e83b718d..000000000 --- a/packages/wikidata-experimental-adapter/src/entity-adapters/sparql-wikidata-item-adapter.ts +++ /dev/null @@ -1,31 +0,0 @@ - -import { RdfSourceWrap } from "@dataspecer/core/core/adapter/rdf"; -import { PimClass } from "@dataspecer/core/pim/model"; -import { RDFS, WIKIDATA } from "../vocabulary"; -import { loadWikidataEntityToResource } from "./sparql-wikidata-entity-adapter"; -import { IriProvider } from "@dataspecer/core/cim"; - -export async function isWikidataItem(entity: RdfSourceWrap): Promise { - return (await entity.types()).includes(WIKIDATA.item); -} - -export async function loadWikidataItem( - entity: RdfSourceWrap, - idProvider: IriProvider - ): Promise { - - const result = new PimClass(); - await loadWikidataEntityToResource(entity, idProvider, result); - - result.pimIsCodelist = false; - result.pimExtends = unique([ - ...result.pimExtends, - ...(await entity.nodes(RDFS.subClassOf)).map(idProvider.cimToPim), - ]); - - return result; - } - - function unique(values: T[]): T[] { - return [...new Set(values)]; - } \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/wd-class-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-class-adapter.ts new file mode 100644 index 000000000..1751119ba --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-class-adapter.ts @@ -0,0 +1,18 @@ + +import { RdfSourceWrap } from "@dataspecer/core/core/adapter/rdf"; +import { PimClass } from "@dataspecer/core/pim/model"; +import { IriProvider } from "@dataspecer/core/cim"; +import { IWdClass } from "../connector/entities/wd-class"; +import { entityIdsToCimIds, loadWikidataEntityToResource } from "./wd-entity-adapter"; +import { EntityTypes } from "../connector/entities/wd-entity"; + +export function loadWikidataClass( + entity: IWdClass, + iriProvider: IriProvider + ): PimClass { + const result = new PimClass(); + loadWikidataEntityToResource(entity, EntityTypes.CLASS, iriProvider, result); + result.pimIsCodelist = false; + result.pimExtends = entityIdsToCimIds(entity.subclassOf, EntityTypes.CLASS).map(iriProvider.cimToPim); + return result; +} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/wd-entity-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-entity-adapter.ts new file mode 100644 index 000000000..a9beb1391 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-entity-adapter.ts @@ -0,0 +1,36 @@ +import { PimResource } from "@dataspecer/core/pim/model"; +import { IriProvider } from "@dataspecer/core/cim"; +import { EntityId, EntityIdsList, EntityTypes, IWdEntity } from "../connector/entities/wd-entity"; +import { WIKIDATA_ENTITY_PREFIX } from "../vocabulary"; + +export function entityIdToCimIri(entityId: EntityId, type: EntityTypes): string { + return WIKIDATA_ENTITY_PREFIX + addEntityPrefixToId(entityId, type); +} + +export function addEntityPrefixToId(entityId: EntityId, type: EntityTypes): string { + if (type === EntityTypes.CLASS) { + return "Q" + entityId.toString(); + } else { + return "P" + entityId.toString(); + } +} + +export function entityIdsToCimIds(entityIds: EntityIdsList, type: EntityTypes): string[] { + return entityIds.map((id) => entityIdToCimIri(id, type)); +} + +export function cimIriToEntityId(cimIri: string): EntityId { + return Number(cimIri.split("/").pop().slice(1)); +} + +export function loadWikidataEntityToResource( + entity: IWdEntity, + type: EntityTypes, + iriProvider: IriProvider, + resource: PimResource, +): void { + resource.pimHumanLabel = entity.labels; + resource.pimHumanDescription = entity.descriptions; + resource.pimInterpretation = entityIdToCimIri(entity.id, type); + resource.iri = iriProvider.cimToPim(resource.pimInterpretation); +} diff --git a/packages/wikidata-experimental-adapter/src/entity-adapters/wd-property-adapter.ts b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-property-adapter.ts new file mode 100644 index 000000000..15ae80ce5 --- /dev/null +++ b/packages/wikidata-experimental-adapter/src/entity-adapters/wd-property-adapter.ts @@ -0,0 +1,85 @@ +import { IriProvider } from "@dataspecer/core/cim"; +import { IWdProperty, UnderlyingType } from "../connector/entities/wd-property"; +import { CoreResource } from "@dataspecer/core/core"; +import { IWdClass } from "../connector/entities/wd-class"; +import { PimAssociation, PimAssociationEnd, PimAttribute } from "@dataspecer/core/pim/model"; +import { entityIdToCimIri, loadWikidataEntityToResource } from "./wd-entity-adapter"; +import { EntityId, EntityTypes } from "../connector/entities/wd-entity"; + + +export type associationTypes = "inward" | "outward"; + + +function isPropertyAttribute(wdProperty: IWdProperty): boolean { + return wdProperty.underlyingType !== UnderlyingType.ENTITY +} + + +function loadOutwardAssociations(storage: CoreResource[], inOrOut: associationTypes, wdProperty: IWdProperty, rootClass: IWdClass, iriProvider: IriProvider) { + for (const objectId of wdProperty.itemConstraints.valueType.instanceOf) { + storage.push(...loadWikidataAssociation(inOrOut, rootClass.id, objectId, wdProperty, iriProvider)); + } + for (const objectId of wdProperty.itemConstraints.valueType.subclassOfInstanceOf) { + storage.push(...loadWikidataAssociation(inOrOut, rootClass.id, objectId, wdProperty, iriProvider)); + } +} + +function loadInwardAssociations(storage: CoreResource[], inOrOut: associationTypes, wdProperty: IWdProperty, rootClass: IWdClass, iriProvider: IriProvider) { + for (const subjectId of wdProperty.generalConstraints.subjectType.instanceOf) { + storage.push(...loadWikidataAssociation(inOrOut, subjectId, rootClass.id, wdProperty, iriProvider)); + } + for (const subjectId of wdProperty.generalConstraints.subjectType.subclassOfInstanceOf) { + storage.push(...loadWikidataAssociation(inOrOut, subjectId, rootClass.id, wdProperty, iriProvider)); + } +} + +export function loadWikidataProperty(inOrOut: associationTypes, wdProperty: IWdProperty, rootClass: IWdClass, iriProvider: IriProvider): CoreResource[] { + if (isPropertyAttribute(wdProperty)) { + return [loadWikidataFakeAttribute(wdProperty, rootClass, iriProvider)] + } + + const coreResources: CoreResource[] = []; + if (inOrOut === "outward" && wdProperty.itemConstraints != null) { + loadOutwardAssociations(coreResources, inOrOut, wdProperty, rootClass, iriProvider); + } else if (inOrOut === "inward") { + loadInwardAssociations(coreResources, inOrOut, wdProperty, rootClass, iriProvider); + } + + return coreResources; +} + +function loadWikidataAssociation(inOrOut: associationTypes, startEntityId: EntityId, endEntityId: EntityId, wdProperty: IWdProperty, iriProvider: IriProvider): [PimAssociationEnd, PimAssociation, PimAssociationEnd] { + const pimAssociation = new PimAssociation(); + loadWikidataEntityToResource(wdProperty, EntityTypes.PROPERTY, iriProvider, pimAssociation); + pimAssociation.pimIsOriented = true; + pimAssociation.iri += "-edge-" + inOrOut + "-" + startEntityId + "-" + endEntityId; + + // Subject of property + const mediates1 = new PimAssociationEnd(); + mediates1.iri = pimAssociation.iri + "-end-1"; + mediates1.pimPart = iriProvider.cimToPim(entityIdToCimIri(startEntityId, EntityTypes.CLASS)); + + // Object of property + const mediates2 = new PimAssociationEnd(); + mediates2.iri = pimAssociation.iri + "-end-2"; + mediates2.pimPart = iriProvider.cimToPim(entityIdToCimIri(endEntityId, EntityTypes.CLASS)); + + // Assign mediates to the association endpoints + pimAssociation.pimEnd = [mediates1.iri, mediates2.iri]; + + return [mediates1, pimAssociation, mediates2]; +} + + +function loadWikidataFakeAttribute(wdProperty: IWdProperty, rootClass: IWdClass, iriProvider: IriProvider): PimAttribute { + const pimAttribute = new PimAttribute(); + const rootClassCimIri = entityIdToCimIri(rootClass.id, EntityTypes.CLASS); + loadWikidataEntityToResource(wdProperty, EntityTypes.PROPERTY, iriProvider, pimAttribute); + pimAttribute.iri += "#fake-attribute-" + getLastPartOfIri(rootClassCimIri); + pimAttribute.pimOwnerClass = iriProvider.cimToPim(rootClassCimIri); + return pimAttribute; + } + + function getLastPartOfIri(iri: string): string { + return iri.split("/").pop(); + } \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-class.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-class.sparql deleted file mode 100644 index 0d322c571..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-class.sparql +++ /dev/null @@ -1,27 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - ?item a wd:Q16222597 ; - rdfs:label ?itemLabel ; - schema:description ?itemDescription ; - rdfs:subClassOf ?subClassOf . -} WHERE { - VALUES ?item { - %CLASS% - } - ?item ?predicate ?object . - SERVICE wikibase:label { - bd:serviceParam wikibase:language "en". - } - OPTIONAL { - ?item schema:description ?itemDescription . - FILTER(LANG(?itemDescription) = "en") - } - OPTIONAL { - ?item p:P279 ?statement . - ?statement ps:P279 ?subClassOf . - } -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-children.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-children.sparql deleted file mode 100644 index eea48bd62..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-children.sparql +++ /dev/null @@ -1,19 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - ?item a wd:Q16222597 ; - rdfs:label ?itemLabel ; - schema:description ?itemDescription ; - rdfs:subClassOf ?linkTo . -} WHERE { - ?item wdt:P279* %CLASS% - OPTIONAL { ?item wdt:P279 ?linkTo } - OPTIONAL { - ?item schema:description ?itemDescription . - FILTER(LANG(?itemDescription) = "en") - } - SERVICE wikibase:label {bd:serviceParam wikibase:language "en" } -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-parents.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-parents.sparql deleted file mode 100644 index e0cb2db08..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-full-hierarchy-parents.sparql +++ /dev/null @@ -1,19 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - ?item a wd:Q16222597 ; - rdfs:label ?itemLabel ; - schema:description ?itemDescription ; - rdfs:subClassOf ?linkTo . -} WHERE { - %CLASS% wdt:P279* ?item - OPTIONAL { ?item wdt:P279 ?linkTo } - OPTIONAL { - ?item schema:description ?itemDescription . - FILTER(LANG(?itemDescription) = "en") - } - SERVICE wikibase:label {bd:serviceParam wikibase:language "en" } -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-children.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-children.sparql deleted file mode 100644 index 58eafda00..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-children.sparql +++ /dev/null @@ -1,21 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - ?child a wd:Q16222597 ; - rdfs:subClassOf ?linkTo ; - rdfs:label ?childLabel ; - schema:description ?childDescription . -} WHERE { - ?child wdt:P279 %CLASS% - OPTIONAL { - ?child wdt:P279 ?linkTo - } - OPTIONAL { - ?child schema:description ?childDescription . - FILTER(LANG(?childDescription) = "en") - } - SERVICE wikibase:label {bd:serviceParam wikibase:language "en" } -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-inward-associations.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-inward-associations.sparql deleted file mode 100644 index d7900f082..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-inward-associations.sparql +++ /dev/null @@ -1,30 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - <__search_results> <__has_search_results> ?property . - ?property a wd:Q21503250 ; - rdfs:label ?propertyLabel ; - schema:description ?propertyDescription ; - rdfs:domain ?subjectConstraint ; - wikibase:propertyType ?type . -} WHERE { - ?property p:P2302 ?statement ; - wikibase:propertyType ?type . - ?statement ps:P2302 wd:Q21510865 ; - pq:P2308 %CLASS% . - OPTIONAL { - ?property p:P2302 ?statementSecond . - ?statementSecond ps:P2302 wd:Q21503250 ; - pq:P2308 ?subjectConstraint . - } - OPTIONAL { - ?property schema:description ?propertyDescription . - FILTER(LANG(?propertyDescription) = "en") - } - SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } -} - - diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-outward-associations.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-outward-associations.sparql deleted file mode 100644 index 103ac5f59..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-outward-associations.sparql +++ /dev/null @@ -1,29 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - <__search_results> <__has_search_results> ?property . - ?property a wd:Q21510865 ; - rdfs:label ?propertyLabel ; - schema:description ?propertyDescription ; - rdfs:range ?valueConstraint ; - wikibase:propertyType ?type . -} WHERE { - ?property p:P2302 ?statement ; - wikibase:propertyType ?type . - ?statement ps:P2302 wd:Q21503250 ; - pq:P2308 %CLASS% . - OPTIONAL { - ?property p:P2302 ?statementSecond . - ?statementSecond ps:P2302 wd:Q21510865 ; - pq:P2308 ?valueConstraint . - } - OPTIONAL { - ?property schema:description ?propertyDescription . - FILTER(LANG(?propertyDescription) = "en") - } - SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } -} - diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-parents.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-parents.sparql deleted file mode 100644 index c4b2a842b..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/get-surroundings-parents.sparql +++ /dev/null @@ -1,26 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - %CLASS% rdfs:subClassOf ?parent . - ?parent a wd:Q16222597 ; - rdfs:label ?parentLabel ; - schema:description ?parentDescription ; - rdfs:subClassOf ?parentOfParent . -} WHERE { - %CLASS% p:P279 ?statement . - ?statement ps:P279 ?parent . - SERVICE wikibase:label { - bd:serviceParam wikibase:language "en". - } - OPTIONAL { - ?parent schema:description ?parentDescription . - FILTER(LANG(?parentDescription) = "en") - } - OPTIONAL { - ?parent p:P279 ?parentStatement . - ?parentStatement ps:P279 ?parentOfParent . - } -} \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/sparql-queries/search.sparql b/packages/wikidata-experimental-adapter/src/sparql-queries/search.sparql deleted file mode 100644 index 9bf5f532f..000000000 --- a/packages/wikidata-experimental-adapter/src/sparql-queries/search.sparql +++ /dev/null @@ -1,34 +0,0 @@ -PREFIX rdfs: -PREFIX schema: -PREFIX skos: -PREFIX xsd: - -CONSTRUCT { - <__search_results> <__has_search_result> ?item . - ?item - <__order> ?num ; - rdfs:label ?itemLabel ; - schema:description ?itemDescription ; - rdfs:subClassOf ?subClassOf . -} WHERE { - SERVICE wikibase:mwapi { - bd:serviceParam wikibase:endpoint "www.wikidata.org" ; - wikibase:api "EntitySearch" ; - mwapi:search %QUERY% ; - mwapi:language "en" ; - mwapi:limit 20 . - ?item wikibase:apiOutputItem mwapi:item. - ?num wikibase:apiOrdinal true. - } - SERVICE wikibase:label { - bd:serviceParam wikibase:language "en" . - } - OPTIONAL { - ?item schema:description ?itemDescription . - FILTER(LANG(?itemDescription) = "en") - } - OPTIONAL { - ?item p:P279 ?statement . - ?statement ps:P279 ?subClassOf . - } -} ORDER BY ASC(?num) \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/vocabulary.ts b/packages/wikidata-experimental-adapter/src/vocabulary.ts index e27c58e97..03f9ee58d 100644 --- a/packages/wikidata-experimental-adapter/src/vocabulary.ts +++ b/packages/wikidata-experimental-adapter/src/vocabulary.ts @@ -41,20 +41,4 @@ export const SCHEMA = { description: "http://schema.org/description", } -export const WIKIDATA_SPARQL_FREE_VAR_PREFIX = "http://query.wikidata.org/bigdata/namespace/wdq/"; - -export const WIKIDATA_ENTITY_PREFIX = "http://www.wikidata.org/entity/"; - -export const WIKIDATA = { - item: WIKIDATA_ENTITY_PREFIX + "Q16222597", - valueTypeConstraint: WIKIDATA_ENTITY_PREFIX + "Q21510865", - subjectTypeConstraint: WIKIDATA_ENTITY_PREFIX + "Q21503250", - subclassOf: WIKIDATA_ENTITY_PREFIX + "P279", -} - -export const WIKIBASE_TYPE_PREFIX = "http://wikiba.se/ontology#"; - -export const WIKIBASE = { - wikibaseItem: WIKIBASE_TYPE_PREFIX + "WikibaseItem", - propertyType: WIKIBASE_TYPE_PREFIX + "propertyType", -} +export const WIKIDATA_ENTITY_PREFIX = "http://www.wikidata.org/entity/"; \ No newline at end of file diff --git a/packages/wikidata-experimental-adapter/src/wikidata-adapter.ts b/packages/wikidata-experimental-adapter/src/wikidata-adapter.ts index 81978f521..9a11df9a3 100644 --- a/packages/wikidata-experimental-adapter/src/wikidata-adapter.ts +++ b/packages/wikidata-experimental-adapter/src/wikidata-adapter.ts @@ -1,64 +1,26 @@ -import jsStringEscape from "js-string-escape"; -import search from "./sparql-queries/search.sparql"; -import getClass from "./sparql-queries/get-class.sparql"; -import getSurroundingsParents from "./sparql-queries/get-surroundings-parents.sparql"; -import getSurroundingsChildren from "./sparql-queries/get-surroundings-children.sparql"; -import getSurroundingsOutwardAssociations from "./sparql-queries/get-surroundings-outward-associations.sparql"; -import getSurroundingsInwardAssociations from "./sparql-queries/get-surroundings-inward-associations.sparql"; -import getFullHierarchyChildren from "./sparql-queries/get-full-hierarchy-children.sparql"; -import getFullHierarchyParents from "./sparql-queries/get-full-hierarchy-parents.sparql"; import {CimAdapter, IriProvider} from "@dataspecer/core/cim"; import {HttpFetch} from "@dataspecer/core/io/fetch/fetch-api"; -import {OFN, XSD, WIKIDATA_SPARQL_FREE_VAR_PREFIX, RDFS} from "./vocabulary"; +import {OFN, XSD} from "./vocabulary"; import {PimClass} from "@dataspecer/core/pim/model/pim-class"; import {CoreResource, ReadOnlyMemoryStore} from "@dataspecer/core/core"; import {CoreResourceReader} from "@dataspecer/core/core/core-reader"; -import { RdfSource, RdfSourceWrap } from "@dataspecer/core/core/adapter/rdf"; -import { SparqlQueryRdfSource } from "@dataspecer/core/io/rdf/sparql/sparql-query-rdf-source"; -import { loadWikidataItem, isWikidataItem } from "./entity-adapters/sparql-wikidata-item-adapter"; -import { FederatedSource } from "@dataspecer/core/io/rdf/federated/federated-rdf-source"; -import { loadWikidataAssociationOrAttribute } from "./entity-adapters/sparql-wikidata-association-attribute-adapter"; -import { WikidataPhpGetEntities } from "./wikidata-php-api/wikidata-php-api-get-entities"; -import { isWikidataItemPhp, loadWikidataItemFromPhpWrap } from "./entity-adapters/php-api-wikidata-item-adapter"; -import { loadWikidataEntityFromPhpWrapToResource } from "./entity-adapters/php-api-wikidata-entity-adapter"; - -const getSurroundingsParentsAndChilren = [ - getClass, - getSurroundingsChildren, - getSurroundingsParents, -]; - -const getSurroundingsAssociations = [ - getSurroundingsOutwardAssociations, - getSurroundingsInwardAssociations, -] - -const getFullHierarchy = [ - getClass, - getFullHierarchyParents, - getFullHierarchyChildren, -] - -const searchQuery = (searchString: string) => search({query: `"${jsStringEscape(searchString)}"`}); - -const getClassQuery = (cimIri: string) => getClass({class: `<${cimIri}>`}); - -const getSurroundingsParentsAndChilrenQuery = (cimIri: string) => getSurroundingsParentsAndChilren.map(q => q({class: `<${cimIri}>`})); - -const getFullHierarchyQuery = (cimIri: string) => getFullHierarchy.map(q => q({class: `<${cimIri}>`})); - -const getSurroundingsAssociationsQuery = (cimIri: string) => getSurroundingsAssociations.map(q => q({class: `<${cimIri}>`})); - - -const IRI_REGEXP = new RegExp("^http://www.wikidata.org/entity/Q[1-9][0-9]*$"); +import { WdConnector } from "./connector/wd-connector"; +import { IHierarchyResponse, ISurroundingsResponse } from "./connector/response"; +import { loadWikidataClass } from "./entity-adapters/wd-class-adapter"; +import { cimIriToEntityId } from "./entity-adapters/wd-entity-adapter"; +import { EntityId } from "./connector/entities/wd-entity"; +import { IWdClass } from "./connector/entities/wd-class"; +import { IWdProperty } from "./connector/entities/wd-property"; +import { associationTypes, loadWikidataProperty } from "./entity-adapters/wd-property-adapter"; export class WikidataAdapter implements CimAdapter { - protected readonly WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"; protected readonly httpFetch: HttpFetch; protected iriProvider!: IriProvider; + protected readonly connector: WdConnector; constructor(httpFetch: HttpFetch) { this.httpFetch = httpFetch; + this.connector = new WdConnector(this.httpFetch); } setIriProvider(iriProvider: IriProvider): void { @@ -88,198 +50,125 @@ export class WikidataAdapter implements CimAdapter { return undefined; } - // @todo implement async search(searchString: string): Promise { if (!this.iriProvider) { throw new Error("Missing IRI provider."); } - const source = new SparqlQueryRdfSource( - this.httpFetch, - this.WIKIDATA_SPARQL_ENDPOINT, - searchQuery(searchString) - ); - await source.query(); - - const results = await source.property( - this.varToWikidataSparqlVar("__search_results"), - this.varToWikidataSparqlVar("__has_search_result") - ); - - let sorted = []; - for await (const result of results) { - const resultWrap = RdfSourceWrap.forIri(result.value, source); - sorted.push({ - sort: Number((await resultWrap.property(this.varToWikidataSparqlVar("__order")))[0].value), - cls: await loadWikidataItem(resultWrap, this.iriProvider), - }); - } - sorted = sorted.sort((a, b) => a.sort - b.sort).map((p) => p.cls); - - if (IRI_REGEXP.test(searchString)) { - const classByIri = await this.getClass(searchString); - if (classByIri) { - sorted = [classByIri]; - } else sorted = [] + const results = [] + const searchResponse = await this.connector.search(searchString); + if (searchResponse != null) { + for (const cls of searchResponse.results.classes) { + const newPimClass = loadWikidataClass(cls, this.iriProvider); + results.push(newPimClass); + } } - - return sorted; + return results; } - // @todo implement async getClass(cimIri: string): Promise { if (!this.iriProvider) { throw new Error("Missing IRI provider."); } - const source = new SparqlQueryRdfSource( - this.httpFetch, - this.WIKIDATA_SPARQL_ENDPOINT, - getClassQuery(cimIri) - ); - await source.query(); - - const resultWrap = RdfSourceWrap.forIri(cimIri, source); - if (!(await isWikidataItem(resultWrap))) { - return null; + const getClassResponse = await this.connector.getClass(cimIriToEntityId(cimIri)); + if (getClassResponse != null && getClassResponse.results.classes.length != 0) { + const cls = getClassResponse.results.classes[0]; + return loadWikidataClass(cls, this.iriProvider); } - return await loadWikidataItem(resultWrap, this.iriProvider); + return null; } - - //http://www.wikidata.org/entity/Q5 - // @todo implement - async getSurroundings(cimIri: string): Promise { + + async getFullHierarchy(cimIri: string): Promise { if (!this.iriProvider) { throw new Error("Missing IRI provider."); } - - const [associationsSources, associationsFinishPromise] = - this.createGroupQuerySparqlSources(cimIri, getSurroundingsAssociationsQuery); - const [parentsChildrenSources, parentsChildrenFinishPromise] = - this.createGroupQuerySparqlSources(cimIri, getSurroundingsParentsAndChilrenQuery); - - // Work on parents because associations queries take longer. - await parentsChildrenFinishPromise; - const parentsChildrenSource = FederatedSource.createExhaustive(parentsChildrenSources); - let resources = await this.loadChildrenAndParentsFromEntity(cimIri, parentsChildrenSource); - - await associationsFinishPromise; - const associationsSource = FederatedSource.createExhaustive(associationsSources); - resources = await this.loadAssociationsAndItsTypesFromEntity(cimIri, associationsSource, resources); - return ReadOnlyMemoryStore.create(resources); + const hierarchyResponse = await this.connector.hierarchy(cimIriToEntityId(cimIri)); + if (hierarchyResponse != null) { + const resources = this.loadParentsChildrenHierarchy(hierarchyResponse); + return ReadOnlyMemoryStore.create(resources); + } + return ReadOnlyMemoryStore.create({}); } - async getFullHierarchy(cimIri: string): Promise { + // @todo implement + async getSurroundings(cimIri: string): Promise { if (!this.iriProvider) { throw new Error("Missing IRI provider."); } - const sources = getFullHierarchyQuery(cimIri).map( - (query) => new SparqlQueryRdfSource(this.httpFetch, this.WIKIDATA_SPARQL_ENDPOINT, query) - ); - await Promise.all(sources.map((q) => q.query())); - const source = FederatedSource.createExhaustive(sources); - const resources = await this.loadChildrenAndParentsFromEntity(cimIri, source); - return ReadOnlyMemoryStore.create(resources); + const surroundingsResponse = await this.connector.surroundings(cimIriToEntityId(cimIri)); + if (surroundingsResponse != null) { + const resources = this.loadSurroundings(surroundingsResponse); + return ReadOnlyMemoryStore.create(resources); + } + return ReadOnlyMemoryStore.create({}); } + async getResourceGroup(cimIri: string): Promise { // Keep as is return []; } - protected varToWikidataSparqlVar(variable: string): string { - return WIKIDATA_SPARQL_FREE_VAR_PREFIX + variable; - } + private loadParentsChildrenHierarchy(hierarchyResponse: IHierarchyResponse): { [iri: string]: CoreResource } { + // Outputs + const resources: { [iri: string]: CoreResource } = {} + const loadedClassesSet = new Set(); - protected createGroupQuerySparqlSources( - cimIri: string, - groupQuery: (string) => string[] - ): [SparqlQueryRdfSource[], Promise] { - const sources = groupQuery(cimIri).map( - (query) => new SparqlQueryRdfSource(this.httpFetch, this.WIKIDATA_SPARQL_ENDPOINT, query) - ); - const finishPromise = Promise.all(sources.map((q) => q.query())); - return [sources, finishPromise]; - } + // Load root + this.tryLoadClassesToResources([hierarchyResponse.results.root], resources, loadedClassesSet); - protected async loadChildrenAndParentsFromEntity( - rootClassCimIri: string, - source: RdfSource - ): Promise<{ [iri: string]: CoreResource }> { - const resources: { [iri: string]: CoreResource } = {}; - - const classesProcessed = new Set(); - let cimIrisToProcess = [rootClassCimIri]; - while (cimIrisToProcess.length) { - const processedCimIri = cimIrisToProcess.pop(); - if (classesProcessed.has(processedCimIri)) { - continue; - } - classesProcessed.add(processedCimIri); - const rdfClassWrap = RdfSourceWrap.forIri(processedCimIri, source); - if (!(await isWikidataItem(rdfClassWrap))) { - continue; - } - - const pimClass = await loadWikidataItem(rdfClassWrap, this.iriProvider); - - const parentsCimIris = (await source.property(processedCimIri, RDFS.subClassOf)).map((r) => r.value); - const childrenCimIris = (await source.reverseProperty(RDFS.subClassOf, processedCimIri)).map((r) => r.value); - cimIrisToProcess = [...cimIrisToProcess, ...parentsCimIris, ...childrenCimIris]; - - resources[pimClass.iri] = pimClass; - } - return resources; - } + // Load parents + this.tryLoadClassesToResources(hierarchyResponse.results.parents, resources, loadedClassesSet); - protected async loadAssociationsAndItsTypesFromEntity( - rootClassCimIri: string, - source: RdfSource, - resources: { [iri: string]: CoreResource } - ): Promise<{ [iri: string]: CoreResource }> { - const processedCLasses = new Set(...Object.keys(resources).map(this.iriProvider.pimToCim)); - const classCimIrisToProcess: string[] = []; + // Load children + this.tryLoadClassesToResources(hierarchyResponse.results.children, resources, loadedClassesSet); - const associationsResults = await source.property( - this.varToWikidataSparqlVar("__search_results"), - this.varToWikidataSparqlVar("__has_search_results") - ); + return resources; + } - for await (const result of associationsResults) { - const rdfResultWrap = RdfSourceWrap.forIri(result.value, source); - const [coreResources, newClassIrisToProcess]: [CoreResource[], string[]] - = await loadWikidataAssociationOrAttribute(rootClassCimIri, rdfResultWrap, source, this.iriProvider); - coreResources.forEach((r) => resources[r.iri] = r); - newClassIrisToProcess.forEach((c) => { - if (!processedCLasses.has(c)) { - classCimIrisToProcess.push(c); - processedCLasses.add(c); - } - }) - } + private loadSurroundings(surroundingsResponse: ISurroundingsResponse): { [iri: string]: CoreResource } { + const resources: { [iri: string]: CoreResource } = {} + const loadedClassesSet = new Set(); + const loadedOutwardsPropertiesSet = new Set(); + const loadedInwardPropertiesSet = new Set(); - console.log(1); - const newPimClasses = await this.getClasses(classCimIrisToProcess); - newPimClasses.forEach((r) => resources[r.iri] = r); - console.log(2); + // Load root + const rootClass = surroundingsResponse.results.root + this.tryLoadClassesToResources([rootClass], resources, loadedClassesSet); + + // Load classes from endpoints + this.tryLoadClassesToResources(surroundingsResponse.results.propertyEndpoints, resources, loadedClassesSet); + + // Load subjectOf properties + this.tryLoadPropertiesToResources("outward", surroundingsResponse.results.subjectOf, rootClass, resources, loadedOutwardsPropertiesSet); + + // Load valueOf properties + this.tryLoadPropertiesToResources("inward", surroundingsResponse.results.valueOf, rootClass, resources, loadedInwardPropertiesSet); return resources; } - protected async getClasses(cimIris: string[]): Promise { - if (!this.iriProvider) { - throw new Error("Missing IRI provider."); - } - let newPimClasses: PimClass[] = [] + private tryLoadClassesToResources(wdClasses: IWdClass[], resources: { [iri: string]: CoreResource }, loadedClassesSet: Set): void { + for (const cls of wdClasses) { + if (!loadedClassesSet.has(cls.id)) { + loadedClassesSet.add(cls.id); + const newPimClass = loadWikidataClass(cls, this.iriProvider) + resources[newPimClass.iri] = newPimClass; + } + } + } - const results = await WikidataPhpGetEntities(this.httpFetch, cimIris); - for await (const entityWrap of results) { - if (await isWikidataItemPhp(entityWrap)) { - const cls = await loadWikidataItemFromPhpWrap(entityWrap, this.iriProvider); - newPimClasses.push(cls); - } + private tryLoadPropertiesToResources(inOrOut: associationTypes, wdProperties: IWdProperty[], rootClass: IWdClass, resources: { [iri: string]: CoreResource }, loadedPropertiesSet: Set): void { + for (const prop of wdProperties) { + if (!loadedPropertiesSet.has(prop.id)) { + loadedPropertiesSet.add(prop.id); + const coreResources: CoreResource[] = loadWikidataProperty(inOrOut, prop, rootClass, this.iriProvider) + for (const resource of coreResources) { + resources[resource.iri] = resource; + } + } } - return newPimClasses; } } diff --git a/packages/wikidata-experimental-adapter/src/wikidata-php-api/wikidata-php-api-get-entities.ts b/packages/wikidata-experimental-adapter/src/wikidata-php-api/wikidata-php-api-get-entities.ts deleted file mode 100644 index 9541a79b2..000000000 --- a/packages/wikidata-experimental-adapter/src/wikidata-php-api/wikidata-php-api-get-entities.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { HttpFetch } from "@dataspecer/core/io/fetch/fetch-api"; -import { WikidataItemPhpWrap } from "../entity-adapters/php-api-wikidata-entity-adapter"; - -const WIKIDATA_PHP_API_GET_ENTITIES_URL: string = "https://www.wikidata.org/w/api.php?action=wbgetentities&languages=en&format=json&origin=*&props=labels|descriptions|claims|datatype"; - -export async function WikidataPhpGetEntities(httpFetch: HttpFetch, cimIris: string[]): Promise { - let results: WikidataItemPhpWrap[] = []; - let classesToQuery: string[] = [] - for await (const [i, cimIri] of cimIris.entries()) { - classesToQuery.push(cimIri); - if (classesToQuery.length === 49 || i + 1 === cimIris.length) { - results.push(...(await WikidataPhpGetEntitiesFetch49(httpFetch, classesToQuery))); - classesToQuery = []; - } - } - return results; -} - -async function WikidataPhpGetEntitiesFetch49(httpFetch: HttpFetch, cimIris: string[]): Promise { - const idsList: string = constructIdsList(cimIris); - const url: string = WIKIDATA_PHP_API_GET_ENTITIES_URL + "&ids=" + idsList; - const result = await (await httpFetch(url)).json() as object; - return (flattenGetEntitiesResponse(result)).map((e) => new WikidataItemPhpWrap(e)); -} - -function flattenGetEntitiesResponse(responseJson: object): object[] { - let items: object[] = []; - for (const [key, value] of Object.entries(responseJson['entities'])) { - items.push((value) as object); - } - return items; -} - -function constructIdsList(cimIris: string[]): string { - return cimIris.map(getLastPartOfIri).join('|'); -} - -function getLastPartOfIri(iri: string): string { - return iri.split("/").pop(); -} -