-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscIMMUNE_PROCESSING_1.R
745 lines (654 loc) · 41.8 KB
/
scIMMUNE_PROCESSING_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
###########################################################################################################################
# SCIENCE FIGURES: scIMMUNE
###########################################################################################################################
# Remove P214 Spleen
setwd("~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/")
project_name <- "scImmune_All_Tissues"
input_dir <- "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/"
phenodata_dir <- "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/RAW_TCRBCR_ALL_TISSUES.csv"
output_dir = "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/"
dir.create(output_dir, recursive = T)
Packages <- c("Seurat","monocle","SingleCellExperiment","ggthemes","plot3D","ggalluvial","dplyr","alluvial", "tcR","scRepertoire","immunarch","powerTCR","circlize","scales")
suppressMessages(lapply(Packages, library, character.only = TRUE))
source('~/Dropbox/KI/Studies/LXXLZ/Website/Web_Scripts/SCA_Analysis_Functions_V1.0.0.R')
# source('/mnt/SCA/PIPELINES/SCA_Analysis_Functions_V1.0.0.R')
# source('/proj/store_eudoraleer/PROJECTS/scRNASEQ/SCRIPTS/SCA_Analysis_Functions_V1.0.0.R')
color_conditions <- color_ini()
#####################################################################################################
contig_monarch_bak <- contig_monarch
# head(contig_monarch)
x$SIDCELLID <- paste(x$SID, x$CELL_ID, sep = "_")
x$SIDCELLID <- gsub("-|\\.","_",x$SIDCELLID)
for(i in 1:length(contig_monarch$data)){
c <- contig_monarch$data[[i]]
c$SID <- meta[match(names(contig_monarch$data)[i], meta$Sample),"SID"]
c$SIDCELLID <- paste(c$SID, gsub("(.*?);.*","\\1",c$Barcode), sep = "_")
c$SIDCELLID <- gsub("-|\\.","_",c$SIDCELLID)
c$SUBTISSUE <- meta[match(names(contig_monarch$data)[i], meta$Sample),"SUBTISSUE"]
c$TISSUE <- meta[match(names(contig_monarch$data)[i], meta$Sample),"TISSUE"]
c$CELL_SUBTYPE <- [email protected][match(c$SIDCELLID, x$SIDCELLID),"CELL_SUBTYPE"]
c$CELL_SUBTYPE[which(is.na(c$CELL_SUBTYPE))] <- ifelse([email protected][match(names(contig_monarch$data)[i], x$Sample),"CELL_SUBTYPE"] == "BCR","B-CELLS","T-CELLS")
contig_monarch$data[[i]] <- c
}
current <- meta_explore[meta_explore$CELL_TYPE == "TCR",]
current_meta <- [email protected][which([email protected]$Sample %in% current$Sample),]
current$SUBTISSUE <- paste([email protected][match(current$Sample, [email protected]$Sample),"TISSUE"], ":", current$SUBTISSUE, sep = "")
current_meta$SUBTISSUE <- paste([email protected][match(current_meta$Sample, [email protected]$Sample),"TISSUE"], ":", current_meta$SUBTISSUE, sep = "")
################################################################################################################
############################ By BCR / TCR separately ###########################################################
for(i in 1:length(contig_table)){
current_type <- contig_types[i]
current_single <- lengthContig(contig_table[[i]], cloneCall="aa", chains = "single", exportTable = TRUE,group = "CELL_SUBTYPE")
write.csv(current_single, paste(output_dir,"04.0SCA_TABLE_CLONOTYPE_ABUNDANCE_AA_SEPARATE_BY_CELL_SUBTYPES_",current_type,"_",project_name,".csv", sep = ""), quote = F, row.names = F)
current_combined <- lengthContig(contig_table[[i]], cloneCall="aa", chains = "combined", exportTable = TRUE,group = "CELL_SUBTYPE")
write.csv(current_combined, paste(output_dir,"04.0SCA_TABLE_CLONOTYPE_ABUNDANCE_AA_COMBINED_BY_CELL_SUBTYPES_",current_type,"_",project_name,".csv", sep = ""), quote = F, row.names = F)
current_single <- lengthContig(contig_table[[i]], cloneCall="nt", chains = "single", exportTable = TRUE,group = "CELL_SUBTYPE")
write.csv(current_single, paste(output_dir,"04.0SCA_TABLE_CLONOTYPE_ABUNDANCE_NUCLEOTIDES_SEPARATE_BY_CELL_SUBTYPES_",current_type,"_",project_name,".csv", sep = ""), quote = F, row.names = F)
current_combined <- lengthContig(contig_table[[i]], cloneCall="nt", chains = "combined", exportTable = TRUE,group = "CELL_SUBTYPE")
write.csv(current_combined, paste(output_dir,"04.0SCA_TABLE_CLONOTYPE_ABUNDANCE_NUCLEOTIDES_COMBINED_BY_CELL_SUBTYPES_",current_type,"_",project_name,".csv", sep = ""), quote = F, row.names = F)
}
######### SUBTISSUES CLONOTYPE FREQUENCY #########################################################
clonotypes_freq <- NULL
current_files <- list.files(path = output_dir, pattern = "^04.0SCA_TABLE_CLONOTYPE_ABUNDANCE_.*BY_CELL_SUBTYPES_.*csv", full.names = T)
tissues_kmer_summary <- NULL
for(i in 1:length(current_files)){
current_celltype <- NULL
current_datatype <- NULL
combined <- NULL
combined <- ifelse(gsub(".*(COMBINED).*","\\1",current_files[i]) == "COMBINED", T, F)
current <- readfile(current_files[i])
if(length(grep("BCR", current_files[i])) > 0){
current_celltype <- "BCR"
}else{
current_celltype <- "TCR"
}
if(combined == T){
colnames(current) <- c("LENGTH","SEQUENCE","CELL_SUBTYPE","SUBTISSUE")
}else{
colnames(current) <- c("LENGTH","SEQUENCE","SUBTISSUE","CHAIN","CELL_SUBTYPE")
}
current_datatype <- gsub(".*TABLE_CLONOTYPE_ABUNDANCE_(.*?)_.*","\\1",current_files[i])
current$LENGTH <- nchar(gsub("_","",current$SEQUENCE))
# temp <- data.frame(table(current[,c("LENGTH","SUBTISSUE")]))
# temp <- temp[!temp$Freq == 0,]
current <- split(current, current$SUBTISSUE)
current <- lapply(current, function(x){x <- split(x,x$CELL_SUBTYPE)})
for(k in 1:length(current)){
c <- current[[k]]
for(j in 1:length(c)){
temp <- c[[j]]
current_median <- NULL
if(combined == T){
current_median <- as.numeric(as.character(summary(temp$LENGTH)[grep("Median",names(summary(temp$LENGTH)))]))
current_frequency <- table(temp$LENGTH)/nrow(temp)
temp$FREQUENCY <- as.numeric(as.character(current_frequency[match(temp$LENGTH,names(current_frequency))]))
}else{
temp <- split(temp, temp$CHAIN)
temp <- lapply(temp, function(x){
current_frequency <- NULL
current_frequency <- table(x$LENGTH)/nrow(x)
x <- cbind(x, MEDIAN_LENGTH = as.numeric(summary(x$LENGTH)[grep("Median",names(summary(x$LENGTH)))]),
FREQUENCY = as.numeric(as.character(current_frequency[match(x$LENGTH,names(current_frequency))])))})
temp <- do.call(rbind.data.frame, temp)
}
if(combined == T){
temp2 <- data.frame(
SUBTISSUE = unique(temp$SUBTISSUE),
CELL_SUBTYPE = unique(temp$CELL_SUBTYPE),
DATA_TYPE = paste(current_celltype, "_", current_datatype, sep = ""),
CHAIN = "COMBINE",
STATUS = ifelse(combined == T, "COMBINED","SINGLE"),
MEDIAN_LENGTH = current_median)
}else{
temp2 <- cbind(
SUBTISSUE = unique(temp$SUBTISSUE),
CELL_SUBTYPE = unique(temp$CELL_SUBTYPE),
DATA_TYPE = paste(current_celltype, "_", current_datatype, sep = ""),
STATUS = ifelse(combined == T, "COMBINED","SINGLE"),
data.frame(unique(temp[,c("CHAIN","MEDIAN_LENGTH")])
))
}
tissues_kmer_summary <- rbind(tissues_kmer_summary,temp2)
c[[j]] <- temp
}
current[[k]] <- do.call(rbind.data.frame, c)
}
current <- do.call(rbind.data.frame, current)
dir.create(paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/", sep = ""))
write.csv(current, paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/WITH_FREQUENCY_INFO_CELL_SUBTYPES_",gsub(".*\\/(.*)","\\1",current_files[i]), sep = ""), quote=F, row.names = F)
write.csv(tissues_kmer_summary, paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/SUBTISSUES_KMER_MEDIAN_LENGTH_SUMMARY_CELL_SUBTYPES_",gsub(".*\\/(.*)","\\1",current_files[i]), sep = ""), quote=F, row.names = F)
# current$SUBTISSUE <- factor(current$SUBTISSUE, levels = sort(unique(current$SUBTISSUE)))
tissues <- sort(unique(current$SUBTISSUE))
for(l in 1:length(tissues)){
current_file_name <- gsub(".*\\/(.*).csv",paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/",tissues[l],"_\\1.png",sep = ""),current_files[i])
somePNGPath = current_file_name
png(somePNGPath, width=5000, height=4000, units = "px", res = 300)
p1 <- ggplot(current[which(current$SUBTISSUE == tissues[l]),],aes(LENGTH,FREQUENCY,fill=CELL_SUBTYPE))+
geom_bar(stat = "identity", position = "dodge")+ #, color = "black", size = 1
theme_classic()+
# facet_wrap(~CELL_SUBTYPE)+
# scale_x_continuous(breaks = seq(0, max(current$LENGTH)+2, by = 5))+
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.text.x = element_text(angle = 0, size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
# legend.key.size = unit(1, "cm"),
legend.position = "bottom",
strip.text.x = element_text(size = 25),
strip.text.y = element_text(size = 25),
plot.title = element_text(size = 20, face=1, hjust = 0.5))+
scale_fill_manual(values = gen_colors(color_conditions$bright, length(unique(current$SUBTISSUE))))+
guides(color=guide_legend(title="SUBTISSUES", ncol = 3),fill = guide_legend(title="SUBTISSUES", ncol = 3))
if(combined == F){
p1 <- p1 + facet_wrap(~CHAIN, ncol=1)
}
print(p1+plot_annotation(title = paste(ifelse(combined==T,"COMBINED ",""), tissues[l]," ",current_celltype, ":CDR3 ",current_datatype," DISTRIBUTION", sep = ""), theme = theme(plot.title = element_text(size = 30, face = "bold", hjust = 0.5))))
dev.off()
# current$SUBTISSUE <- factor(current$SUBTISSUE, levels = rev(sort(unique(current$SUBTISSUE))))
current_file_name <- gsub(".*\\/(.*).csv",paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/",tissues[l],"_\\1_BOXPLOT.png",sep = ""),current_files[i])
somePNGPath = current_file_name
png(somePNGPath, width=5000, height=5000, units = "px", res = 300)
p2 <- ggplot(current[which(current$SUBTISSUE == tissues[l]),],aes(LENGTH,CELL_SUBTYPE,fill=CELL_SUBTYPE))+ geom_boxplot()+ theme_classic()+
# scale_x_continuous(breaks = seq(0, max(current$LENGTH)+2, by = 5))+
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.text.x = element_text(angle = 0, size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
# legend.key.size = unit(1, "cm"),
legend.position = "none",
strip.text.x = element_text(size = 25),
strip.text.y = element_text(size = 25),
plot.title = element_text(size = 20, face=1, hjust = 0.5))+
scale_fill_manual(values = gen_colors(color_conditions$bright, length(unique(current$SUBTISSUE))))+
guides(color=guide_legend(title="SUBTISSUES", ncol = 3),fill = guide_legend(title="SUBTISSUES", ncol = 3))
if(combined == F){
p2 <- p2 + facet_wrap(~CHAIN, ncol=1)
}
print(p2+plot_annotation(title = paste(ifelse(combined==T,"COMBINED ",""), tissues[l]," ",current_celltype, ":CDR3 ",current_datatype," DISTRIBUTION", sep = ""), theme = theme(plot.title = element_text(size = 30, face = "bold", hjust = 0.5))))
dev.off()
library(ggridges)
# current$SUBTISSUE <- factor(current$SUBTISSUE, levels = sort(unique(current$SUBTISSUE)))
current_file_name <- gsub(".*\\/(.*).csv",paste(output_dir,"/05.0CLONOTYPE_ABUNDANCE_CELL_SUBTYPES/",tissues[l],"_\\1_RIDGE.png",sep = ""),current_files[i])
somePNGPath = current_file_name
png(somePNGPath, width=5000, height=5000, units = "px", res = 300)
p2 <- ggplot(current[which(current$SUBTISSUE == tissues[l]),],aes(LENGTH,CELL_SUBTYPE,fill=CELL_SUBTYPE))+ geom_density_ridges()+ theme_classic()+
# scale_x_continuous(breaks = seq(0, max(current$LENGTH)+2, by = 5))+
theme(plot.margin = unit(c(1,1,1,1), "cm"),
axis.text.x = element_text(angle = 0, size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
# legend.key.size = unit(1, "cm"),
legend.position = "none",
strip.text.x = element_text(size = 25),
strip.text.y = element_text(size = 25),
plot.title = element_text(size = 20, face=1, hjust = 0.5))+
scale_fill_manual(values = gen_colors(color_conditions$bright, length(unique(current$SUBTISSUE))))+
guides(color=guide_legend(title="SUBTISSUES", ncol = 3),fill = guide_legend(title="SUBTISSUES", ncol = 3))
if(combined == F){
p2 <- p2 + facet_wrap(~CHAIN, ncol=1)
}
print(p2+plot_annotation(title = paste(ifelse(combined==T,"COMBINED ",""), tissues[l]," ",current_celltype, ":CDR3 ",current_datatype," DISTRIBUTION", sep = ""), theme = theme(plot.title = element_text(size = 30, face = "bold", hjust = 0.5))))
dev.off()
}
}
#### ALL WRONG!!!!! scRepertoire Package Bug
# abundance_contig <- NULL
# abundance_names <- NULL
# clonecall_types
# k <- 1
# for(i in 1:length(clonecall_types)){
# for(j in 1:length(cell_types)){
# abundance_contig[[k]] <- abundanceContig(contig_table[[j]], cloneCall = clonecall_types[i], group = c("CELL_SUBTYPE","TISSUE"),exportTable = T)
# abundance_contig[[k]] <- abundance_contig[[k]][order(abundance_contig[[k]]$Abundance, decreasing = T),]
# write.csv(abundance_contig[[k]], paste(output_dir,"scIMMUNE_CLONOTYPES_ABUNDANCE_TISSUE_",cell_types[j],"_",toupper(clonecall_types[i]),"_",current_type,".csv", sep = ""), quote = F, row.names = F)
# abundance_names <- c(abundance_names, paste(cell_types[j],"_",toupper(clonecall_types[i]),"_",current_type, sep = ""))
# k <- k+1
# }
# }
abundance_contig <- NULL
abundance_names <- NULL
clonecall_cols <- c("CTgene","CTnt","CTstrict","CTaa")
clonecall_cols
clonecall_types
m <- 1
for(i in 1:length(clonecall_types)){
for(j in 1:length(cell_types)){
current <- contig_table[[j]]
for(k in 1:length(current)){
current[[k]] <- data.frame(table(current[[k]][,c(clonecall_cols[i],"SUBTISSUE","CELL_SUBTYPE")]))
}
current <- do.call(rbind.data.frame, current)
abundance_contig[[m]] <- current
abundance_names <- c(abundance_names, paste(cell_types[j],"_",toupper(clonecall_types[i]),sep = ""))
write.csv(abundance_contig[[m]], paste(output_dir,"scIMMUNE_CLONOTYPES_ABUNDANCE_TISSUE_",cell_types[j],"_",toupper(clonecall_types[i]),".csv", sep = ""), quote = F, row.names = F)
m <- m+1
}
}
saveRDS(abundance_contig,"~/Desktop/abundance_contig_5.2SCA_PLOT_ALLUVIAL.RDS")
dir.create("~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/5.2SCA_PLOT_ALLUVIAL/")
abundance_contig <- readRDS("~/Desktop/abundance_contig_5.2SCA_PLOT_ALLUVIAL.RDS")
n <- 10
for(k in 1:length(abundance_contig)){
current <- split(abundance_contig[[k]], abundance_contig[[k]]$SUBTISSUE)
for(j in 1:length(current)){
temp <- current[[j]]
temp <- temp[temp[grep("freq", colnames(temp), ignore.case = T)] > 0,]
temp$PROPORTION <- temp[,grep("freq", colnames(temp), ignore.case = T)]/sum(temp[,grep("freq", colnames(temp), ignore.case = T)])
temp <- split(temp, temp$CELL_SUBTYPE)
temp <- temp[unlist(lapply(temp, nrow)) > 0]
temp <- lapply(temp, function(y){
y <- y[order(y$PROPORTION, decreasing = T),]
y <- y[1:ifelse(nrow(y)<n, nrow(y), n),]
})
temp <- do.call(rbind.data.frame, temp)
current[[j]] <- temp
temp <- NULL
}
current <- do.call(rbind.data.frame,current)
abundance_contig[[k]] <- current
current <- split(current, current$CELL_SUBTYPE)
current <- current[unlist(lapply(current, nrow)) > 0]
for(l in 1:length(current)){
plotx <- current[[l]]
cname <- colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)]
cct <- as.character(unique(plotx$CELL_SUBTYPE))
colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] <- "CLONE_TYPE"
library(reshape2)
plotx <- dcast(plotx, CLONE_TYPE~SUBTISSUE, value.var = "PROPORTION")
plotx[is.na(plotx)] <- 0
write.table(plotx,paste(output_dir,"5.2SCA_PLOT_ALLUVIAL/5.2SCA_PLOT_ALLUVIAL_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".txt", sep = ""), row.names = F, quote = F, sep = "\t")
class(plotx) <- c("immunr_dynamics","data.table","data.frame")
p1 <- vis(plotx)+ggtitle(paste(cct, ": ",gsub("(.*?)_(.*)","\\1",abundance_names[k]), " TOP ",n," CLONOTYPES (",cname,")", sep = ""))+
scale_fill_manual(values = gen_colors(color_conditions$colorful, nrow(plotx)))+
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.text.x = element_text(angle = 45,hjust=1,vjust = 1,size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 25),
legend.text = element_text(size = 15),
legend.key.size = unit(1, "cm"),
strip.text.x = element_text(size = 15),
plot.title = element_text(size = 20, face=2),
strip.background = element_rect(colour="white", fill="white"),
legend.position = "none")+guides(color=guide_legend(title="SUBTISSUE", ncol = 1),fill = guide_legend(ncol = 1))+xlab("SUB-TISSUES")+ylab("PROPORTION")
somePNGPath = paste(output_dir,"5.2SCA_PLOT_ALLUVIAL/5.2SCA_PLOT_ALLUVIAL_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".png", sep = "")
png(somePNGPath, width=5000, height=3000, units = "px", res = 300)
print(p1)
dev.off()
}
}
saveRDS(abundance_contig, "~/Desktop/abundance_contig_subcelltypes_5.2SCA_PLOT_ALLUVIAL.RDS")
abundance_contig <- readRDS("~/Desktop/abundance_contig_5.2SCA_PLOT_ALLUVIAL.RDS")
clonecall_cols
n <- 10
for(k in 1:length(abundance_contig)){
current <- abundance_contig[[k]]
current$CELL_SUBTYPE <- gsub(".*B-CELL.*","B-CELL",current$CELL_SUBTYPE, ignore.case = T)
current$CELL_SUBTYPE <- gsub(".*T-CELL.*","T-CELL",current$CELL_SUBTYPE, ignore.case = T)
current <- split(current, current$SUBTISSUE)
for(j in 1:length(current)){
temp <- current[[j]]
temp <- temp[temp[grep("freq", colnames(temp), ignore.case = T)] > 0,]
temp$PROPORTION <- temp[,grep("freq", colnames(temp), ignore.case = T)]/sum(temp[,grep("freq", colnames(temp), ignore.case = T)])
temp <- split(temp, temp$CELL_SUBTYPE)
temp <- temp[unlist(lapply(temp, nrow)) > 0]
temp <- lapply(temp, function(y){
y <- y[order(y$PROPORTION, decreasing = T),]
y <- y[1:ifelse(nrow(y)<n, nrow(y), n),]
})
temp <- do.call(rbind.data.frame, temp)
current_clones <- as.character(unique(temp[,colnames(temp)[grep("SUBTISSUE|CELL_SUBTYPE|Freq|PROPORTION",colnames(temp), ignore.case = T, invert = T)]]))
temp2 <- NULL
for(i in 1:length(current_clones)){
currentcurrent <- temp[which(temp[,colnames(temp)[grep("SUBTISSUE|CELL_SUBTYPE|Freq|PROPORTION",colnames(temp), ignore.case = T, invert = T)]] == current_clones[i]),]
current_name <- colnames(currentcurrent)[grep("SUBTISSUE|CELL_SUBTYPE|Freq|PROPORTION",colnames(currentcurrent), ignore.case = T, invert = T)]
currentcurrent <- data.frame(unique(currentcurrent[which(currentcurrent[,current_name] == current_clones[i]),c(current_name,"SUBTISSUE","CELL_SUBTYPE")]), Freq = sum(currentcurrent$Freq), PROPORTION = sum(currentcurrent$PROPORTION))
temp2 <- rbind(temp2,currentcurrent)
}
temp <- temp2
rm(temp2)
current[[j]] <- temp
temp <- NULL
}
current <- do.call(rbind.data.frame,current)
abundance_contig[[k]] <- current
current <- split(current, current$CELL_SUBTYPE)
current <- current[unlist(lapply(current, nrow)) > 0]
for(l in 1:length(current)){
plotx <- current[[l]]
cname <- colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)]
cct <- as.character(unique(plotx$CELL_SUBTYPE))
colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] <- "CLONE_TYPE"
library(reshape2)
plotx <- dcast(plotx, CLONE_TYPE~SUBTISSUE, value.var = "PROPORTION")
plotx[is.na(plotx)] <- 0
write.table(plotx,paste(output_dir,"5.2SCA_PLOT_ALLUVIAL/5.2SCA_PLOT_ALLUVIAL_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".txt", sep = ""), row.names = F, quote = F, sep = "\t")
class(plotx) <- c("immunr_dynamics","data.table","data.frame")
p1 <- vis(plotx)+ggtitle(paste(cct, ": ",gsub("(.*?)_(.*)","\\1",abundance_names[k]), " TOP ",n," CLONOTYPES (",cname,")", sep = ""))+
scale_fill_manual(values = gen_colors(color_conditions$colorful, nrow(plotx)))+
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.text.x = element_text(angle = 45,hjust=1,vjust = 1,size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 25),
legend.text = element_text(size = 15),
legend.key.size = unit(1, "cm"),
strip.text.x = element_text(size = 15),
plot.title = element_text(size = 20, face=2),
strip.background = element_rect(colour="white", fill="white"),
legend.position = "none")+guides(color=guide_legend(title="SUBTISSUE", ncol = 1),fill = guide_legend(ncol = 1))+xlab("SUB-TISSUES")+ylab("PROPORTION")
somePNGPath = paste(output_dir,"5.2SCA_PLOT_ALLUVIAL/5.2SCA_PLOT_ALLUVIAL_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".png", sep = "")
png(somePNGPath, width=5000, height=3000, units = "px", res = 300)
print(p1)
dev.off()
}
}
saveRDS(abundance_contig, "~/Desktop/abundance_contig_TBcells_5.2SCA_PLOT_ALLUVIAL.RDS")
# for(j in 1:length(current)){
# temp <- split(current[[j]], current[[j]]$CELL_SUBTYPE)
# temp <- temp[unlist(lapply(temp, nrow)) > 0]
# temp <- lapply(temp, function(y){
# y <- data.frame(y, PROPORTION = y[,grep("freq", colnames(y), ignore.case = T)]/sum(y[,grep("freq", colnames(y), ignore.case = T)]))
# })
# temp <- do.call(rbind.data.frame, temp)
# }
# temp <- temp[temp$PROPORTION > 0,]
# }
library(reshape2)
dir.create("~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE")
abundance_contig <- readRDS("~/Desktop/abundance_contig_subcelltypes_5.2SCA_PLOT_ALLUVIAL.RDS")
n <- 10
for(k in 1:length(abundance_contig)){
current <- abundance_contig[[k]]
cell_types <- as.character(sort(unique(current$CELL_SUBTYPE)))
for(j in 1:length(cell_types)){
plotx <- current[current$CELL_SUBTYPE == cell_types[j],]
plotx <- plotx[order(plotx$Freq, decreasing = T),]
ctop <- unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])[1:ifelse(length(unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])) < n, length(unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])), n)]
plotx <- plotx[which(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] %in% ctop),]
cname <- colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)]
cct <- as.character(unique(plotx$CELL_SUBTYPE))
colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] <- "CLONE_TYPE"
plotx <- dcast(plotx, CLONE_TYPE~SUBTISSUE, value.var = "PROPORTION")
plotx[is.na(plotx)] <- 0
plotx$CLONE_TYPE <- gsub("^NA;|;NA$","",plotx$CLONE_TYPE)
plotx$CLONE_TYPE <- gsub(";NA;",";",plotx$CLONE_TYPE)
plotx$CLONE_TYPE <- gsub("^None;|;None$","",plotx$CLONE_TYPE, ignore.case = T)
plotx$CLONE_TYPE <- gsub(";NONE;",";",plotx$CLONE_TYPE, ignore.case = T)
write.table(plotx,paste(output_dir,"5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE/5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".txt", sep = ""), row.names = F, quote = F, sep = "\t")
class(plotx) <- c("immunr_dynamics","data.table","data.frame")
p1 <- vis(plotx)+ggtitle(paste(cct, " ONLY: ",gsub("(.*?)_(.*)","\\1",abundance_names[k]), " TOP ",n," CLONOTYPES (",cname,")", sep = ""))+
scale_fill_manual(values = gen_colors(color_conditions$colorful, nrow(plotx)))+
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.text.x = element_text(angle = 45,hjust=1,vjust = 1,size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 20),
legend.text = element_text(size = ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 10,14)),
legend.key.size = unit(1, "cm"),
strip.text.x = element_text(size = 15),
plot.title = element_text(size = 30, face=2),
strip.background = element_rect(colour="white", fill="white"),
legend.position = "bottom")+
guides(color=guide_legend(title="SUBTISSUE", ncol = 2),fill = guide_legend(ncol = ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 1,2)))+xlab("SUB-TISSUES")+
ylab("PROPORTION")
somePNGPath = paste(output_dir,"5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE/5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".png", sep = "")
png(somePNGPath, width=6000, height=ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 6000,4000), units = "px", res = 300)
print(p1)
dev.off()
}
}
abundance_contig <- readRDS("~/Desktop/abundance_contig_TBcells_5.2SCA_PLOT_ALLUVIAL.RDS")
n <- 10
for(k in 1:length(abundance_contig)){
current <- abundance_contig[[k]]
cell_types <- as.character(sort(unique(current$CELL_SUBTYPE)))
for(j in 1:length(cell_types)){
plotx <- current[current$CELL_SUBTYPE == cell_types[j],]
plotx <- plotx[order(plotx$Freq, decreasing = T),]
ctop <- unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])[1:ifelse(length(unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])) < n, length(unique(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)])), n)]
plotx <- plotx[which(plotx[,grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] %in% ctop),]
cname <- colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)]
cct <- as.character(unique(plotx$CELL_SUBTYPE))
colnames(plotx)[grep(paste(clonecall_cols,collapse = "|"), colnames(plotx), ignore.case = T)] <- "CLONE_TYPE"
plotx <- dcast(plotx, CLONE_TYPE~SUBTISSUE, value.var = "PROPORTION")
plotx[is.na(plotx)] <- 0
plotx$CLONE_TYPE <- gsub("^NA;|;NA$","",plotx$CLONE_TYPE)
plotx$CLONE_TYPE <- gsub(";NA;",";",plotx$CLONE_TYPE)
plotx$CLONE_TYPE <- gsub("^None;|;None$","",plotx$CLONE_TYPE, ignore.case = T)
plotx$CLONE_TYPE <- gsub(";NONE;",";",plotx$CLONE_TYPE, ignore.case = T)
write.table(plotx,paste(output_dir,"5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE/5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".txt", sep = ""), row.names = F, quote = F, sep = "\t")
class(plotx) <- c("immunr_dynamics","data.table","data.frame")
p1 <- vis(plotx)+ggtitle(paste(gsub("CELL$","CELLS",cct, ignore.case = T), " ONLY: ",gsub("(.*?)_(.*)","\\1",abundance_names[k]), " TOP ",n," CLONOTYPES (",cname,")", sep = ""))+
scale_fill_manual(values = gen_colors(color_conditions$colorful, nrow(plotx)))+
theme(plot.margin = unit(c(2,2,2,2), "cm"),
axis.text.x = element_text(angle = 45,hjust=1,vjust = 1,size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25, margin=margin(10,0,0,0)),
axis.title.y = element_text(size = 25, margin=margin(0,10,0,0)),
legend.title = element_text(size = 20),
legend.text = element_text(size = ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 10,14)),
legend.key.size = unit(1, "cm"),
strip.text.x = element_text(size = 15),
plot.title = element_text(size = 30, face=2),
strip.background = element_rect(colour="white", fill="white"),
legend.position = "bottom")+
guides(color=guide_legend(title="SUBTISSUE", ncol = 2),fill = guide_legend(ncol = ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 1,2)))+xlab("SUB-TISSUES")+
ylab("PROPORTION")
somePNGPath = paste(output_dir,"5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE/5.5SCA_PLOT_ALLUVIAL_TOP10_EACH_CELL_SUBTYPE_",gsub("\\s+|\\/","_",cct), "_",abundance_names[k],"_TOP_",n,"_MOST_ABUNDANT_CLONOTYPES_",cname,".png", sep = "")
png(somePNGPath, width=6000, height=ifelse(length(grep("CTStrict|Ctnt", cname, ignore.case = T)) > 0, 6000,4000), units = "px", res = 300)
print(p1)
dev.off()
}
}
# for(k in 1:length(abundance_contig)){
# current <- abundance_contig[[k]]
# cell_types <- unique(current$CELL_SUBTYPE)
# for(j in 1:length(cell_types)){
# plotx <- current[which(current$CELL_SUBTYPE == cell_types[j]),]
# plotx <- dcast(plotx, CTaa ~SUBTISSUE, value.var = "PROPORTION")
# plotx[is.na(plotx)] <- 0
# row.names(plotx) <- plotx$CTaa
# plotx <- plotx[,grep("CTaa",colnames(plotx),invert = T)]
# plotx <- scale(plotx)
# plotx <- t(scale(t(plotx)))
# heatmap.2(as.matrix(t(plotx)), trace="none",key=T, keysize=1,
# col=jet2.col (n = 100, alpha = 1),srtCol=45, scale="none", Colv = T,
# Rowv = T,
# density.info="none", cexCol=0.8,cexRow=0.8)
#
# }
#
#
# }
######## Find top genes and do pathway analysis
rnaseq_coords$CELL_ID
table(x$TCRBCR)
head(rnaseq_coords)
dim(rnaseq_coords)
which(paste(rnaseq_coords$SAMPLE_ID,rnaseq_coords$CELL_ID, sep = "") %in%
paste(x$SAMPLE_ID,x$CELL_ID, sep = ""))
mapp_ref <- data.frame(SID = paste(rnaseq_coords$SAMPLE_ID,rnaseq_coords$CELL_ID, sep = "_"),
CELL_SUBTYPE = [email protected][match(paste(rnaseq_coords$SAMPLE_ID,rnaseq_coords$CELL_ID, sep = ""), paste(x$SAMPLE_ID,x$CELL_ID, sep = "")),"CELL_SUBTYPE"])
mapp_ref$CELL_SUBTYPE[which(is.na(mapp_ref$CELL_SUBTYPE))] <- "OTHERS"
saveRDS(mapp_ref, "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/Original_cell_order_final_tcrbcr_subtypes_20210725.RDS")
row.names([email protected]) <- colnames(x)
# x2 <- subset(x, subset = TCRBCR != "B-CELLS")
# x2 <- subset(x2, subset = TCRBCR != "OTHER CELLS")
rnaseq_coords$CELL_TYPE <- [email protected][match(paste(rnaseq_coords$CELL_ID,rnaseq_coords$SAMPLE_ID),paste(x$CELL_ID,x$SAMPLE_ID)),"CELL_SUBTYPE"]
rnaseq_coords[which(is.na(rnaseq_coords$CELL_TYPE)),"CELL_TYPE"] <- "TO_REMOVE"
saveRDS(rnaseq_coords,"~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/RDATA_RDS/FINAL_TCRBCR_CELL_SUBTYPES_RNASEQ_COORDS.RDS")
# data_integrated <- subset(data_integrated, subset = CELL_SUBTYPE != "OTHERS")
# data_integrated <- subset(data_integrated, subset = CELL_TYPE != "TO_REMOVE")
#### Top markers: Cell Types for Each Tissue
celltypes_tissues <- data.frame(table([email protected][,c("TISSUE","CELL_SUBTYPE")]))
plotx <- reshape2::dcast(celltypes_tissues, TISSUE ~ CELL_SUBTYPE, value.var = "Freq")
write.table(plotx, "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/FINAL_CELL_TYPE_CELL_NUM_TISSUE_WISE.txt", quote = F, row.names = F, sep = "\t")
celltypes_tissues <- celltypes_tissues[celltypes_tissues$Freq > 0,]
celltypes_tissues$ID <- paste(celltypes_tissues$TISSUE, celltypes_tissues$CELL_SUBTYPE, sep = ":")
to_remove <- celltypes_tissues[which(celltypes_tissues$Freq <= 100),"ID"]
# plotx <- reshape2::dcast(celltypes_tissues, TISSUE ~ CELL_SUBTYPE, value.var = "Freq")
# row.names(plotx) <- plotx$TISSUE
# plotx <- plotx[,grep("^TISSUE$", colnames(plotx), ignore.case = T, invert = T)]
# heatmap.2(as.matrix(t(plotx)), trace="none",key=T, keysize=1,
# col=jet2.col (n = 100, alpha = 1),srtCol=45, scale="none", Colv = T,
# Rowv = T,
# density.info="none", cexCol=0.8,cexRow=0.8)
files <- list.files(path = "~/Dropbox/KI/Studies/LXXLZ/Website/10X/IMMUNE_PROFILING/Data/SAMPLE_DATA/OUTPUT/RDATA_RDS/",
pattern = "_FindAllMarkers_Cell_Type_Result_RNA_Assay_Integrated_Data.RDS", full.names = T)
files <- files[grep("TCRBCR_FindAllMarkers_Cell_Type_Result_RNA_Assay_Integrated_Data", files, ignore.case = T, invert = T)]
for(j in 1:length(files)){
cname <- gsub(".*\\/(.*)_FindAllMarkers_Cell_Type_Result_RNA_Assay_Integrated_Data.RDS","\\1",files[j])
current_markers <- readRDS(files[j])
current_markers <- current_markers[current_markers$p_val_adj < 0.05,]
current_markers <- current_markers[order(current_markers$avg_log2FC, decreasing = T),]
current_markers <- current_markers[which(!paste(cname,current_markers$cluster, sep = ":") %in% to_remove),]
write.table(current_markers, paste(output_dir,"FILTERED_P005_LOW_CELL_NUM_100_TISSUE_",cname,"_FindAllMarkers.txt", sep = ""), quote = F, row.names = F, sep = "\t")
n <- 50
topn <- split(current_markers, current_markers$cluster)
topn <- lapply(topn, function(x){
x <- x[order(x$avg_log2FC, decreasing = T),]
x <- x[1:n,]
})
topn <- do.call(rbind.data.frame, topn)
n <- 200
p_threshold <- 0.01
current_markers <- current_markers[which(current_markers$p_val_adj < p_threshold),]
clusters <- unique(current_markers$cluster)
current_go <- paste(output_dir, "/TOP_",n,"_GENES_GO_TERMS_TISSUE_CELLTYPES/", sep = "")
dir.create(current_go)
current_kegg <- paste(output_dir, "/TOP_",n,"_GENES_KEGG_PATHWAYS_TISSUE_CELLTYPES/", sep = "")
dir.create(current_kegg)
for(i in 1:length(clusters)){
current <- current_markers[which(current_markers$cluster == clusters[i]),]
current <- current[order(current$avg_log2FC, decreasing = T),]
current <- data.frame(SYMBOL = unique(current[,"gene"])[1:ifelse(nrow(current)<n, nrow(current),n)])
gene_col_provided <- "SYMBOL"
entrez_out <- select(org.Hs.eg.db, keys = current$SYMBOL, keytype = 'SYMBOL', columns = 'ENTREZID')
current$ENTREZID <- entrez_out[match(current$SYMBOL, entrez_out$SYMBOL),"ENTREZID"]
current <- current[which(!is.na(current$ENTREZID)),]
go <- goana(current$ENTREZID, species="Hs")
go <- go[go$P.DE < p_threshold,]
go <- go[order(go$P.DE, decreasing = F),]
write.csv(go, paste(current_go,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_GO_TERMS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
n <- 50
TOP_GO <- topGO(go, ontology =c("BP","CC","MF"), n=n)
TOP_GO <- TOP_GO[order(TOP_GO$P.DE, decreasing = F),]
TOP_GO$Term <- factor(TOP_GO$Term, levels = rev(unique(TOP_GO$Term)))
TOP_GO$NegLogP <- -log(TOP_GO$P.DE)
write.csv(TOP_GO, paste(current_go,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_GO_BPCCMF_TERMS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
somePNGPath <- paste(current_go,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_GO_BPCCMF_TERMS_P",p_threshold,".png",sep = "")
png(somePNGPath, width = 4000, height =2000, units = "px", res = 300)
print(ggplot(TOP_GO, aes(NegLogP, Term, color = Term))+
geom_point(size = 3, alpha = 0.8, stroke = 1)+
theme_classic()+theme(legend.position = "none",
title = element_text(face = 2, size = 12)) +
ylab("GO TERMS")+
ggtitle(paste(gsub("_"," ",cname), " - TOP GO TERMS: ",gsub("_"," ",clusters[i]),sep="")))
dev.off()
keg <- kegga(current$ENTREZID, species="Hs")
keg <- keg[keg$P.DE < p_threshold,]
keg <- keg[order(keg$P.DE, decreasing = F),]
write.csv(keg, paste(current_kegg,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
n <- 50
TOP_KEGG <- topKEGG(keg, n=n)
write.csv(TOP_KEGG, paste(current_kegg,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
TOP_KEGG <- TOP_KEGG[order(TOP_KEGG$P.DE, decreasing = F),]
TOP_KEGG$Pathway <- factor(TOP_KEGG$Pathway, levels = rev(unique(TOP_KEGG$Pathway)))
TOP_KEGG$NegLogP <- -log(TOP_KEGG$P.DE)
somePNGPath <- paste(current_kegg,"/",cname,"_",gsub("\\/","_",clusters[i]),"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".png",sep = "")
png(somePNGPath, width = 4000, height =2000, units = "px", res = 300)
print(ggplot(TOP_KEGG, aes(NegLogP, Pathway, color = Pathway))+
geom_point(size = 3, alpha = 0.8, stroke = 1)+
theme_classic()+theme(legend.position = "none",
title = element_text(face = 2, size = 12)) +
ggtitle(paste(gsub("_"," ",cname), " - TOP KEGG PATHWAYS: ",gsub("_"," ",clusters[i]),sep="")))
dev.off()
}
}
save.image(paste(output_dir,"/",project_name,"_20210626.RData", sep = ""))
# #### Top markers: clusters
current_markers <- readRDS(paste(output_dir,"RDATA_RDS/FindAllMarkers_Result_RNA_Assay_Integrated_Data.RDS", sep = ""))
# current_markers <- NULL
# files <- list.files("OUTPUT/BEFORE_FINAL/ALL_SIG_GENES_TCELL_CLUSTERS/", full.names = T, pattern = "SIG_GENES_T_CELL_CLUSTER")
# for(i in 1:length(files)){
# current <- read.table(files[i], header=T)
# current_markers <- rbind(current_markers, current)
# }
current_markers <- current_markers[order(current_markers$avg_log2FC, decreasing = T),]
n <- 200
p_threshold <- 0.01
current_markers <- current_markers[which(current_markers$p_val_adj < p_threshold),]
current_dir <- paste(output_dir, "/TOP_",n,"_GENES_GO_TERMS_INTEGRATIVE_DATA_RNA_ASSAY_CLUSTERS/", sep = "")
dir.create(current_dir)
clusters <- unique(current_markers$cluster)
for(i in 1:length(clusters)){
current <- current_markers[which(current_markers$cluster == clusters[i]),]
current <- current[order(current$avg_log2FC, decreasing = T),]
current <- data.frame(SYMBOL = unique(current[,"gene"])[1:ifelse(nrow(current)<n, nrow(current),n)])
gene_col_provided <- "SYMBOL"
entrez_out <- select(org.Hs.eg.db, keys = current$SYMBOL, keytype = 'SYMBOL', columns = 'ENTREZID')
current$ENTREZID <- entrez_out[match(current$SYMBOL, entrez_out$SYMBOL),"ENTREZID"]
current <- current[which(!is.na(current$ENTREZID)),]
go <- goana(current$ENTREZID, species="Hs")
go <- go[go$P.DE < p_threshold,]
go <- go[order(go$P.DE, decreasing = F),]
write.csv(go, paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_GO_TERMS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
n <- 50
TOP_GO <- topGO(go, ontology =c("BP","CC","MF"), n=n)
TOP_GO <- TOP_GO[order(TOP_GO$P.DE, decreasing = F),]
TOP_GO$Term <- factor(TOP_GO$Term, levels = rev(unique(TOP_GO$Term)))
TOP_GO$NegLogP <- -log(TOP_GO$P.DE)
write.csv(TOP_GO, paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_GO_BPCCMF_TERMS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
somePNGPath <- paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_GO_BPCCMF_TERMS_P",p_threshold,".png",sep = "")
png(somePNGPath, width = 3500, height =2000, units = "px", res = 300)
print(ggplot(TOP_GO, aes(NegLogP, Term, color = Term))+
geom_point(size = 3, alpha = 0.8, stroke = 1)+
theme_classic()+theme(legend.position = "none",
title = element_text(face = 2, size = 12)) +
ylab("GO TERMS")+
ggtitle(paste("TOP GO TERMS: CLUSTER ",clusters[i],sep="")))
dev.off()
keg <- kegga(current$ENTREZID, species="Hs")
keg <- keg[keg$P.DE < p_threshold,]
keg <- keg[order(keg$P.DE, decreasing = F),]
write.csv(keg, paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
n <- 50
TOP_KEGG <- topKEGG(keg, n=n)
write.csv(TOP_KEGG, paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".csv",sep = ""), quote = F, row.names = T)
TOP_KEGG <- TOP_KEGG[order(TOP_KEGG$P.DE, decreasing = F),]
TOP_KEGG$Pathway <- factor(TOP_KEGG$Pathway, levels = rev(unique(TOP_KEGG$Pathway)))
TOP_KEGG$NegLogP <- -log(TOP_KEGG$P.DE)
somePNGPath <- paste(current_dir,"/CLUSTER_",clusters[i],"_TOP_",n,"_GENES_KEGG_PATHWAYS_P",p_threshold,".png",sep = "")
png(somePNGPath, width = 3000, height =2000, units = "px", res = 300)
print(ggplot(TOP_KEGG, aes(NegLogP, Pathway, color = Pathway))+
geom_point(size = 3, alpha = 0.8, stroke = 1)+
theme_classic()+theme(legend.position = "none",
title = element_text(face = 2, size = 12)) +
ggtitle(paste("TOP KEGG PATHWAYS: CLUSTER ",clusters[i],sep="")))
dev.off()
}
# library("gplots")
# library("plot3D")
# library("org.Hs.eg.db")
# library("limma")
# somePNGPath = paste("OUTPUT/TOP_",n,"_GENES_TCRBCR_CELL_SUBTYPES.png", sep = "")
# png(somePNGPath, width=2000, height=n*220, units = "px", res = 150)
# heatmap.2(plot_median_CELL_SUBTYPE,margin=c(30, 15), trace="none",key=T, keysize=1,
# col=jet2.col (n = 100, alpha = 1),main = project_name,
# srtCol=45,
# scale="none", Colv = T, Rowv = T,
# density.info="none", cexCol=2,cexRow=2)
# dev.off()