-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGscTools.R
130 lines (111 loc) · 4.32 KB
/
GscTools.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
library("Haplin")
library("arm")
library("randomForest")
RawZeroRemove<-function(data,missratio=0.5){
threshold<-(missratio)*ncol(data)
NaRaw<-which(apply(data,1,function(x) sum(is.na(x))>=threshold))
zero<-which(apply(data,1,function(x) sum(x==0)>=threshold))
NaRAW<-c(NaRaw,zero)
if(length(NaRAW)>0){
output<-data[-NaRAW,]
}else{
output<-data;
}
output
}
manifest2barcode<-function(manifest){
x=read.table(manifest,header = T)
manifest_length= nrow(x)
id= toString(sprintf('"%s"', x$id))
Part1= '{"filters":{"op":"in","content":{"field":"files.file_id","value":[ '
Part2= '] }},"format":"TSV","fields":"file_id,file_name,cases.submitter_id,cases.case_id,data_category,data_type,cases.samples.tumor_descriptor,cases.samples.tissue_type,cases.samples.sample_type,cases.samples.submitter_id,cases.samples.sample_id,cases.samples.portions.analytes.aliquots.aliquot_id,cases.samples.portions.analytes.aliquots.submitter_id","size":'
Part3= paste0("\"",manifest_length, "\"", "}")
Sentence= paste(Part1,id,Part2,Part3, collapse=" ")
write.table(Sentence,"Payload.txt",quote=F,col.names=F,row.names=F)
system("curl --request POST --header \"Content-Type: application/json\" --data @Payload.txt \"https://api.gdc.cancer.gov/files\" > barcode.txt")
}
ENST2Symbol<-function(ENST){
db<-read.table("https://raw.githubusercontent.com/Shicheng-Guo/AnnotationDatabase/master/ENSG.ENST.ENSP.Symbol.hg19.bed",sep="\t")
Symbol<-db[match(ENST,db$V7),4]
return(Symbol)
}
ENSG2Symbol<-function(ENSG){
db<-read.table("https://raw.githubusercontent.com/Shicheng-Guo/AnnotationDatabase/master/ENSG.ENST.ENSP.Symbol.hg19.bed",sep="\t")
Symbol<-db[match(ENSG,db$V8),4]
ENSG<-unlist(lapply(strsplit(ENSG,split="[.]"),function(x) x[1]))
Symbol<-db[match(as.character(ENSG),db$V8),4]
return(Symbol)
}
ENSP2Symbol<-function(ENSP){
db<-read.table("https://raw.githubusercontent.com/Shicheng-Guo/AnnotationDatabase/master/ENSG.ENST.ENSP.Symbol.hg19.bed",sep="\t")
Symbol<-db[match(ENSP,db$V9),4]
return(Symbol)
}
id2phen4<-function(filename){
library("stringr")
as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
}
id2phen3<-function(filename){
library("stringr")
as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*"))
}
id2bin<-function(filename){
library("stringr")
filename<-as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
as.numeric(lapply(strsplit(filename,"-"),function(x) x[4]))
}
id2pid<-function(filename){
library("stringr")
filename<-as.array(str_extract(filename,"edu_...."))
unlist(lapply(filename,function(x) unlist(strsplit(x,"[_]"))[2]))
}
cpg2symbol<-function(cpg){
map<-read.table("https://raw.githubusercontent.com/Shicheng-Guo/AnnotationDatabase/master/hg19/GPL13534_450K_hg19_V3.bed")
symbol<-map[match(cpg,map[,4]),5]
return(symbol)
}
id2phen4<-function(filename){
library("stringr")
as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
}
id2phen3<-function(filename){
library("stringr")
as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*"))
}
id2bin<-function(filename){
library("stringr")
filename<-as.array(str_extract(filename,"TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
as.numeric(lapply(strsplit(filename,"-"),function(x) x[4]))
}
id2pid<-function(filename){
library("stringr")
filename<-as.array(str_extract(filename,"edu_...."))
unlist(lapply(filename,function(x) unlist(strsplit(x,"[_]"))[2]))
}
RawNARemove<-function(data,missratio=0.3){
threshold<-(missratio)*ncol(data)
NaRaw<-which(apply(data,1,function(x) sum(is.na(x))>=threshold))
zero<-which(apply(data,1,function(x) all(x==0))==T)
NaRAW<-c(NaRaw,zero)
if(length(NaRAW)>0){
output<-data[-NaRAW,]
}else{
output<-data;
}
output
}
cpg2symbol<-function(cpg){
map<-read.table("https://raw.githubusercontent.com/Shicheng-Guo/AnnotationDatabase/master/hg19/GPL13534_450K_hg19_V3.bed")
symbol<-map[match(cpg,map[,4]),5]
return(symbol)
}
tsneplot<-function(mydata,phen,plot="tsne.plot.pdf"){
library("tsne")
data=data.frame(phen,mydata)
pdf(plot)
colors = rainbow(length(unique(data$phen)))
names(colors) = unique(data$phen)
ecb = function(x,y){plot(x,t='n'); text(x,labels=data$phen, col=colors[data$phen]) }
tsne_iris = tsne(data, epoch_callback = ecb, perplexity=10)
dev.off()
}