Skip to content

Commit

Permalink
Merge pull request #360 from clarin-eric/formats
Browse files Browse the repository at this point in the history
a quick update of the master
  • Loading branch information
bansp authored Dec 19, 2024
2 parents 4b74900 + 0bb957c commit c8ed77c
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 28 deletions.
34 changes: 17 additions & 17 deletions SIS/clarin/data/domains.xml
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
<domains xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="../schemas/domains.xsd">
<!-- sync the state of this taxonomy with the schema for recommendations, i.e. recommendation.xsd -->
<domain id="1" orderBy="Source Data">
<domain id="1" orderBy="Source Language Data">
<name>Audiovisual Source Language Data</name>
<desc>Audio or video recordings providing spoken/multimodal or signed language data for
research purposes.</desc>
</domain>
<domain id="2" orderBy="Source Data">
<domain id="2" orderBy="Source Language Data">
<name>Image Source Language Data</name>
<desc>Digitized images of analogue sources of written language data for research purposes
(e.g. facsimiles, scans of handwriting, photos of inscriptions).</desc>
</domain>
<domain id="3" orderBy="Source Data">
<domain id="3" orderBy="Source Language Data">
<name>Textual Source Language Data</name>
<desc>Written unstructured/plain text or originally structured text (e.g. HTML) without linguistic or other mark-up added for research purposes.</desc>
<desc>Written unstructured/plain text or originally structured text (e.g. HTML), without linguistic or other mark-up added for research purposes.</desc>
</domain>
<domain id="4" orderBy="Source Data">
<domain id="4">
<name>Contextual Data</name>
<desc>Images (photos or drawings) or documents relevant to the communicative event or text but not part of the source language data.</desc>
<desc>Images (photos or drawings) or documents relevant to the communicative event or text, but not part of the source language data.</desc>
</domain>
<domain id="5" orderBy="Annotation">
<name>Text Annotation</name>
<desc>Annotations of textual sources/written text, with the original text included or as stand-off.</desc>
</domain>
<domain id="6" orderBy="Annotation">
<name>Audiovisual Annotation</name>
<desc>Annotations of audiovisual sources, usually including a basic rendering of the spoken content (transcription) and sometimes further annotation.</desc>
<desc>Annotations of audiovisual sources, usually including a basic rendering of the spoken content (transcription) and, sometimes, further annotation.</desc>
</domain>
<domain id="7" orderBy="Annotation">
<name>Image Annotation</name>
Expand All @@ -47,31 +47,31 @@
<name>Documentation</name>
<desc>Unstructured documentation of the resource and its parts such as corpus or annotation guidelines.</desc>
</domain>
<domain id="12" orderBy="Databases">
<domain id="12">
<name>Lexical Resource</name>
<desc>Structured (item-based) resources for lexical and/or conceptual information on units of language (e.g. wordlists, lexicons, WordNets etc.)</desc>
<desc>Structured (item-based) resources for lexical and/or conceptual information on units of language (e.g., wordlists, lexicons, WordNets, etc.)</desc>
</domain>
<domain id="13" orderBy="Uncategorized">
<domain id="13">
<name>Tool Support</name>
<desc>Tool-related formats required for specific functionality of the tool or reliable reuse of resources (e.g. tagsets, annotation schemes, vocabularies, language models, parameter files, and other specifications or settings)</desc>
<desc>Tool-related formats required for specific functionality of the tool or reliable reuse of resources (e.g., tagsets, annotation schemes, vocabularies, language models, parameter files, and other specifications or settings).</desc>
</domain>
<domain id="14" orderBy="Databases">
<domain id="14">
<name>Geodata</name>
<desc>Information on geographic locations.</desc>
</domain>
<domain id="15" orderBy="Databases">
<domain id="15">
<name>Statistical Data</name>
<desc>Data from surveys and tests in numeric formats.</desc>
</domain>
<domain id="16" orderBy="Databases">
<domain id="16">
<name>Language Description</name>
<desc>Structured or unstructured descriptions of linguistic varieties or phenomena, typological databases etc.</desc>
<desc>Structured or unstructured descriptions of linguistic varieties or phenomena, typological databases, etc.</desc>
</domain>
<domain id="17" orderBy="Uncategorized">
<domain id="17">
<name>Packaging</name>
<desc>Packaging formats of various nature (archiving, compression, library) if no more specific domain is suitable.</desc>
</domain>
<domain id="18" orderBy="Uncategorized">
<domain id="18">
<name>Other</name>
<desc>Any other function that cannot be included in an existing domain. The content of this domain will be periodically examined for potential patterns that may give rise to new domains.</desc>
</domain>
Expand Down
19 changes: 17 additions & 2 deletions SIS/clarin/model/domain.xqm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
xquery version "3.0";
xquery version "3.1";

module namespace domain="http://clarin.ids-mannheim.de/standards/domain";

Expand Down Expand Up @@ -31,7 +31,22 @@ declare function domain:get-metadomain($nameOrId as xs:string){
};

(: return a sequence of full domain nodes by passing the name of a metadomain, e.g. 'Annotation' :)
declare function domain:get-domains-by-metadomain($name as xs:string){
declare function domain:get-domains-by-metadomain($name as xs:string) as element(domain)+ {
if ($name eq 'Uncategorized')
then
$domain:domains[count(@orderBy) eq 0] (:add also @orderBy eq '' ? Let it bug out, rather... :)
else
$domain:domains[@orderBy eq $name]
};

(: return a sequence of metadomain names together with "Uncategorized" :)
declare function domain:get-all-metadomains() as xs:string+ {
(:distinct-values($domain:domains/@orderBy):)
(:distinct-values($domain:domains/@orderBy)[string-length() gt 0], "Uncategorized":)
distinct-values($domain:domains/@orderBy), "Uncategorized"
};





21 changes: 13 additions & 8 deletions SIS/clarin/modules/domain.xql
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@ declare function dm:get-id-by-name($name as xs:string){
};

(: get the name of the metadomain by passing a domain name or a domain ID :)
declare function dm:get-metadomain($nameOrId as xs:string){
domain:get-metadomain($nameOrId)
declare function dm:get-metadomain($nameOrId as xs:string) as xs:string {
let $domain-name := domain:get-metadomain($nameOrId)
let $metadomain := if ($domain-name eq '')
then "Uncategorized"
else $domain-name

return $metadomain
};

(: return a sequence of full domain nodes by passing the name of a metadomain, e.g. 'Annotation' :)
declare function dm:get-domains-by-metadomain($name as xs:string){
declare function dm:get-domains-by-metadomain($name as xs:string) as element(domain)+ {
domain:get-domains-by-metadomain($name)
};

Expand All @@ -38,8 +43,8 @@ declare function dm:get-domain-names-by-metadomain($name as xs:string){


(: Generate the list of domains for the particular group :)
declare function dm:list-domains($group as xs:string) {
for $domain in $domain:domains[@orderBy eq $group]
declare function dm:list-domains($group as xs:string) as element(li)+ {
for $domain in domain:get-domains-by-metadomain($group)
let $domain-id := $domain/@id
let $domain-name := $domain/name/text()
let $domain-snippet := $domain/desc
Expand All @@ -60,7 +65,7 @@ declare function dm:list-domains($group as xs:string) {

(: iterate across the groups of domains :)
declare function dm:list-domains-grouped() {
for $group in distinct-values($domain:domains/@orderBy)
for $group in domain:get-all-metadomains()
order by $group
return
<li>
Expand All @@ -72,10 +77,10 @@ declare function dm:list-domains-grouped() {
};


declare function dm:create-domain-group-recommendation-link($group as xs:string){
declare function dm:create-domain-group-recommendation-link($group as xs:string) as xs:string {

let $domain-ids :=
for $id in $domain:domains[@orderBy eq $group]/@id
for $id in domain:get-domains-by-metadomain($group)/@id
return concat("domain=",$id)
let $joined-domains := fn:string-join($domain-ids,"&amp;")

Expand Down
2 changes: 1 addition & 1 deletion SIS/clarin/schemas/domains.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<xs:documentation>This is an internal handle, used at some places for easier processing.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="orderBy" use="required">
<xs:attribute name="orderBy" use="optional">
<xs:annotation><xs:documentation>This attribute identifies the metadomain, such as "Annotation" for various specific kinds of annotation.</xs:documentation></xs:annotation>
</xs:attribute>
</xs:complexType>
Expand Down

0 comments on commit c8ed77c

Please sign in to comment.