-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmodule.annotation.r
62 lines (53 loc) · 2.65 KB
/
module.annotation.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Remove TCGA barcode that exists in the annotation database
# Input: TCGA barcode
# Annotation tbl
# array of categoryId to keep. To remove all annotation barcode, use "none". Default is c("6", "204"), corresponding to prior and synchronous malignancy
# recursive flag. Default is TRUE, to remove not only the exact barcode, but all derived barcode
# Output: Filtered TCGA barcode
FilterByAnnotation <- function(barcode, annot.tbl, keep=c("6", "204"), recursive=T) {
# remove annotation with categoryId in character array "keep"
barcodeToRemove <- unique(annot.tbl$item[! annot.tbl$categoryId %in% keep])
if (recursive) {
# recursively find annotation barcode, as well as any derived barcode, from the query barcode
regexToRemove <- paste0("(", paste(barcodeToRemove, collapse="|"), ")")
indexToRemove <- which(grepl(regexToRemove, barcode))
} else {
# only find annotation barcode of exact mach
indexToRemove <- which(barcode %in% barcodeToRemove)
}
# remove the barcode detected
if(length(indexToRemove) > 0) {
barcode = barcode[-w]
}
return(barcode)
}
# Get TCGA Annotation JSON list of the input "disease"
GetAnnotationJSON <- function(disease) {
# load packages
require(RCurl)
require(RJSONIO)
# get annotation file and parce to json of dccAnnotation
url <- paste0("https://tcga-data.nci.nih.gov/annotations/resources/searchannotations/json?disease=", disease)
cat(paste("querying TCGA annotation database of disease", disease, "... ...\n"))
annot.file <- getURL(url)
annot.json <- fromJSON(annot.file)$dccAnnotation
}
# Input Annotation JSON list, output Annotation Table with 3 columns "id", "item" and "categoryId". If a outFile name is given, the function will save the table into a RData file.
GetAnnotationTable <- function(annot.json, outFile=NA){
# parse annotation json
id <- as.character(sapply(annot.json, function(x) x$id))
item <- as.character(sapply(annot.json, function(x) x$items[[1]]$item))
categoryId <- as.character(sapply(annot.json, function(x) x$annotationCategory$categoryId))
annot.tbl <- cbind.data.frame(id, item, categoryId, stringsAsFactors=F)
# save and return annotation table
if(!is.na(outFile)) {
save(annot.tbl, file=outFile)
}
return(annot.tbl)
}
# Input vector of TCGA barcodes and an Annotation Table, output Annotation List with barcode as names, and combined categoryId as values
QueryBarcodeAnnotation <- function(barcode, annot.tbl){
annot.lst <- sapply(barcode, function(y) paste(sort(unique(annot.tbl$categoryId[which(sapply(annot.tbl$item, function(x) grepl(x, y)))])), collapse="|") )
names(annot.lst) <- barcode
return(annot.lst)
}