-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgfvo_examples.ttl
343 lines (283 loc) · 15.1 KB
/
gfvo_examples.ttl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
@prefix : <http://www.biointerchange.org/gfvo#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix faldo: <http://biohackathon.org/resource/faldo#> .
@prefix so: <http://purl.bioontology.org/ontology/SO/SO> .
#################################################################
#
# Example 1
#
# Encoding loci and basic genomic feature annotations. The
# example covers lines 0-3 of the example gene description
# given in the GFF3 specification.
# (http://www.sequenceontology.org/resources/gff3.html)
#
# Please note that this seemingly verbose data representation
# is slighly misleading. The reuse of non-literals as well as
# binary RDF encodings such as RDF/HDT permits a more concise
# representation of the same data.
#
# This example also avoids blank nodes, so that this file can
# be conveniently viewed using the Protege ontology editor.
# Using blank nodes, the line
#
# :hasIdentifier :ExampleSet1Feature1Identifier ;
#
# can be substituted with
#
# :hasIdentifier [
# a :Identifier ;
# :hasValue "gene00001" ;
# ]
#
#################################################################
# This example is representing the contents of a GFF3 file, version 3.
:Example1Version a :Version ;
:hasValue "gff-version 3" .
# Two genomic features are described in the file, where an extra comment
# was added to clarify the provenance of this example representation.
:ExampleSet1 a :File ;
:hasIdentifier :Example1Version ;
:hasMember :ExampleSet1Feature1 ,
:ExampleSet1Feature2 ;
rdfs:comment "A simple example of a hierarchical genomic feature dependency. This example is an excerpt of another example given in the GFF3 specification (http://sequenceontology.org/resources/gff3.html)."@en .
# Features in this example are placed on a single landmark, which has
# an identifier associated with it (it cannot be anonymous in GFF3, even
# though GFVO permits landmarks without explicit identifier), and it
# has a range of sequence positions it covers (expressed by a separate
# pragma statement in GFF3).
:ExampleSet1Landmark a :Landmark ;
:hasIdentifier :ExampleSet1LandmarkIdentifier ;
:hasAttribute :ExampleSet1LandmarkRegion .
:ExampleSet1LandmarkIdentifier a :Identifier ;
:hasValue "ctg123" .
:ExampleSet1LandmarkRegion a faldo:Region ;
faldo:begin :ExampleSet1LandmarkStartPosition ;
faldo:end :ExampleSet1LandmarkEndPosition .
:ExampleSet1LandmarkStartPosition a faldo:ExactPosition ;
faldo:position 1 .
:ExampleSet1LandmarkEndPosition a faldo:ExactPosition ;
faldo:position 1497228 .
# GFF3, GTF and GVF make use of named attributes. There are predefined (reserved)
# names, but users can also freely contribute their own attributes. In this example,
# only one labeled attribute is used, whose name we define upfront.
#
# Note: It might be a bit confusing that the label "Name" is applied to an attribute
# of type "Name". This is just an artifact of the chosen example and it is of
# course possible to label a "Name" attribute arbitrarily.
:ExampleSet1AttributeName a :Label ;
:hasValue "Name" .
# First feature of the GFF3 file (line 2): a gene.
:ExampleSet1Feature1 a :Feature ,
so:0000704 ;
:isLocatedOn :ExampleSet1Landmark ;
:hasPart :ExampleSet1Feature1Locus ;
:hasIdentifier :ExampleSet1Feature1Identifier ;
:hasAttribute :ExampleSet1Feature1Name .
:ExampleSet1Feature1Locus a :Locus ;
:hasAttribute :ExampleSet1Feature1Region .
:ExampleSet1Feature1Region a faldo:Region ;
faldo:begin :ExampleSet1FeatureStartPosition ;
faldo:end :ExampleSet1Feature1EndPosition .
# The start coordinate of both features is the same in this example. So this
# entity is used by both :ExampleSet1Feature1 and :ExampleSet1Feature2.
:ExampleSet1FeatureStartPosition a faldo:ExactPosition ;
faldo:position 1000 .
:ExampleSet1Feature1EndPosition a faldo:ExactPosition ;
faldo:position 9000 .
:ExampleSet1Feature1Identifier a :Identifier ;
:hasValue "gene00001" .
:ExampleSet1Feature1Name a :Name ;
:hasAttribute :ExampleSet1AttributeName ;
:hasValue "EDEN" .
# Second feature of the GFF3 file (line 3): a transcription factor
# binding site.
# It references the first feature via a "Parent" attribute.
:ExampleSet1Feature2 a :Feature ,
so:0000235 ;
:isLocatedOn :ExampleSet1Landmark ;
:hasPart :ExampleSet1Feature2Locus ;
:hasIdentifier :ExampleSet1Feature2Identifier ;
:hasSource :ExampleSet1Feature1 .
:ExampleSet1Feature2Locus a :Locus ;
:hasAttribute :ExampleSet1Feature1Region .
:ExampleSet1Feature2Region a faldo:Region ;
faldo:begin :ExampleSet1FeatureStartPosition ;
faldo:end :ExampleSet1Feature2EndPosition .
:ExampleSet1Feature2EndPosition a faldo:ExactPosition ;
faldo:position 1012 .
:ExampleSet1Feature2Identifier a :Identifier ;
:hasValue "tfbs00001" .
#################################################################
#
# Example 2
#
# Encoding of a sequence alignment. This is part of the GFF3
# specification, denoting the alignment between the reference
# sequence "chr3" and the target sequence "EST23".
# (http://www.sequenceontology.org/resources/gff3.html)
#
#################################################################
:ExampleSet2 a :Collection ;
:hasMember :ExampleSet2Feature1 ,
:ExampleSet2Feature2 ;
rdfs:comment "An example of a sequence alignment. This example is an excerpt of another example given in the GFF3 specification (http://sequenceontology.org/resources/gff3.html)."@en .
:ExampleSet2Landmark a :Landmark ;
:hasIdentifier :ExampleSet2LandmarkIdentifier .
:ExampleSet2LandmarkIdentifier a :Identifier ;
:hasValue "chr3" .
# Reference sequence feature:
:ExampleSet2Feature1 a :Feature ;
:isLocatedOn :ExampleSet2Landmark ;
:hasIdentifier :ExampleSet2Feature1Identifier .
:ExampleSet1Feature1Identifier a :Identifier ;
:hasValue "Match1" .
# Target sequence feature:
:ExampleSet2Feature2 a :Feature ,
so:0000343 ;
:isLocatedOn :ExampleSet2Landmark ;
:hasIdentifier :ExampleSet2Feature2Identifier .
:ExampleSet1Feature2Identifier a :Identifier ;
:hasValue "EST23" .
# Description of the sequence alignment between :ExampleSet2Feature1
# and :ExampleSet2Feature2 (features "Match1" and "EST23").
:ExampleSet2SequenceAlignment a :SequenceAlignment ;
:hasAttribute :ExampleSet2AlignmentLocus ;
:hasSource :ExampleSet2Feature1 ;
:hasInput :ExampleSet2Feature2 ;
:hasFirstPart :ExampleSet2AlignmentOperation1 ;
:hasOrderedPart :ExampleSet2AlignmentOperation2 ,
:ExampleSet2AlignmentOperation3 ,
:ExampleSet2AlignmentOperation4 ;
:hasLastPart :ExampleSet2AlignmentOperation5 .
# The locus describes the region over which the alignment operation
# is being carried out.
:ExampleSet2AlignmentLocus a :Locus ;
:hasAttribute :ExampleSet2AlignmentRegion .
:ExampleSet2AlignmentRegion a faldo:Region ;
faldo:begin :ExampleSet2AlignmentStartPosition ;
faldo:end :ExampleSet2AlignmentEndPosition .
:ExampleSet2AlignmentStartPosition a faldo:ExactPosition ;
faldo:position 1 .
:ExampleSet2AlignmentEndPosition a faldo:ExactPosition ;
faldo:position 21 .
# Description of an actual alignment operation.
:ExampleSet2AlignmentOperation1 rdf:type :Match ;
:hasAttribute :ExampleSet2AlignmentOperation1Span ;
:isBefore :ExampleSet2AlignmentOperation2 .
:ExampleSet2AlignmentOperation1Span a :Span ;
:hasValue 8 .
:ExampleSet2AlignmentOperation2 rdf:type :TargetSequenceGap ;
:hasAttribute :ExampleSet2AlignmentOperation2Span ;
:isAfter :ExampleSet2AlignmentOperation1 ;
:isBefore :ExampleSet2AlignmentOperation3 .
:ExampleSet2AlignmentOperation2Span a :Span ;
:hasValue 3 .
:ExampleSet2AlignmentOperation3 rdf:type :Match ;
:hasAttribute :ExampleSet2AlignmentOperation3Span ;
:isAfter :ExampleSet2AlignmentOperation2 ;
:isBefore :ExampleSet2AlignmentOperation4 .
:ExampleSet2AlignmentOperation3Span a :Span ;
:hasValue 6 .
:ExampleSet2AlignmentOperation4 rdf:type :ReferenceSequenceGap ;
:hasAttribute :ExampleSet2AlignmentOperation4Span ;
:isAfter :ExampleSet2AlignmentOperation3 ;
:isBefore :ExampleSet2AlignmentOperation5 .
:ExampleSet2AlignmentOperation4Span a :Span ;
:hasValue 1 .
:ExampleSet2AlignmentOperation5 rdf:type :Match ;
:hasAttribute :ExampleSet2AlignmentOperation5Span ;
:isAfter :ExampleSet2AlignmentOperation4 .
:ExampleSet2AlignmentOperation5Span a :Span ;
:hasValue 6 .
#################################################################
#
# Example 3
#
# Encoding of genotypes/sequence variants. This is part of the GVF
# specification, denoting two variants for two separate
# sequenced individuals: "Variant_seq=A,T;Genotype=0:1,1:1".
# (See "Genotype" in the GVF specification at
# http://www.sequenceontology.org/resources/gvf.html)
#
#################################################################
:ExampleSet3 a :Collection ;
:hasMember :ExampleSet3Feature1 ;
rdfs:comment "Encoding of genotypes/sequence variants. This example is part of the GVF specification, denoting two variants for two separate sequenced individuals (http://www.sequenceontology.org/resources/gvf.html)."@en .
# The individuals that were sequenced are represented by named, but otherwise
# unspecified, instances. This provides sufficient information to distinguish
# between the individuals that were sequenced.
:ExampleSet3SequencedIndividual1 rdf:type :SequencedIndividual .
:ExampleSet3SequencedIndividual2 rdf:type :SequencedIndividual .
# Genomic feature that captures the two described genotypes.
:ExampleSet3Feature1 a :Feature ;
:hasAttribute :ExampleSet3Genotype1 ,
:ExampleSet3Genotype2 .
# Genotype for the first individual.
#
# The genotype's phased variants are linked via "hasFirstPart" and
# "hasLastPart" here, which are descendants of "hasOrderedPart".
# The object properties "isBefore"/"isAfter" connect the phased variants.
#
# Note: for an unphased genotype, it is sufficient to use "hasPart" only.
:ExampleSet3Genotype1 a :Genotype ;
:hasParticipant :ExampleSet3SequencedIndividual1 ;
:hasQuality :Heterozygous ;
:hasAttribute :ExampleSet3Phase ;
:hasFirstPart :ExampleSet3ReferenceA_AT ;
:hasLastPart :ExampleSet3VariantT_AT .
# Ordering of the variants permits the expression of phased genotypes.
# It is recommended to use "isBefore", but the use of "isAfter" is optional.
:ExampleSet3ReferenceA_AT :isBefore :ExampleSet3VariantT_AT .
:ExampleSet3VariantT_AT :isAfter :ExampleSet3ReferenceA_AT .
# Genotype of the second individual.
:ExampleSet3Genotype2 a :Genotype ;
:hasParticipant :ExampleSet3SequencedIndividual2 ;
:hasQuality :Homozygous ;
:hasAttribute :ExampleSet3Phase ;
:hasFirstPart :ExampleSet3VariantT_TT ;
:hasLastPart :ExampleSet3VariantT_TT .
# As above: variant ordering to phase the genotype.
:ExampleSet3VariantT_TT :isBefore :ExampleSet3VariantT_TT ;
:isAfter :ExampleSet3VariantT_TT .
# Having an instance to denote the presence of phase information
# might appear redundant across data sets. On the other hand, it
# permits data providers to enrich their own or other providers'
# data set with additional information (annotations) about used
# protocols/equipment/tools.
:ExampleSet3Phase a :GameticPhase .
# For VCF data, the type can also be "ReferenceSequence", which
# denotes that the sequence is located on a reference genome. This
# would be indicated -- in a VCF file -- by a "0" in the GT field
# (e.g., "0|1" (phased) or "0/1" (unphased) to indicate a genotype
# whose first allele is on the reference genome and the second allele
# is an alternative allele).
:ExampleSet3ReferenceA_AT a :ReferenceSequence ;
:hasValue "A" .
:ExampleSet3VariantT_AT a :SequenceVariant ;
:hasValue "T" .
:ExampleSet3VariantT_TT a :SequenceVariant ;
:hasValue "T" .
#################################################################
#
# Example 4
#
# Encoding of a Phred score. GVF leaves it open how the
# "score" (column 6) should be interpreted, but recommends
# the use of Phred scores. If a data provider knows that
# a Phred score was utilized, then this can be represented
# via GFVO.
# (See "Column Descriptions" in the GVF specification at
# http://www.sequenceontology.org/resources/gvf.html)
#
#################################################################
:ExampleSet4 a :Collection ;
:hasMember :ExampleSet4Feature1 ;
rdfs:comment "Encoding of a Phred score. This example is motivated by the GVF specification, which encourages -- but does not enforce -- the use of Phred scores (http://www.sequenceontology.org/resources/gvf.html)."@en .
# Interpreting "score" (column 6) of a GVF file as Phred score.
:ExampleSet4Feature1 a :Feature ;
:hasAttribute :ExampleSet4Feature1Score .
# Use of "PhredScore" instead of "Score" to denote the specific
# type of score being used.
:ExampleSet4Feature1Score a :PhredScore ;
:hasValue 38 .