-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarise_and_plot_tensorqtl_results.R
executable file
·121 lines (104 loc) · 5.8 KB
/
summarise_and_plot_tensorqtl_results.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
library(data.table)
library(ggplot2)
library(qvalue)
tensorqtl_dir = "/share/ScratchGeneral/anncuo/OneK1K/saige_eqtl/output/tensorqtl/"
celltypes = c("B_IN","B_Mem","CD4_NC", "CD4_ET", "CD4_SOX4", "CD8_NC", "CD8_ET",
"CD8_S100B", "DC", "Mono_C", "Mono_NC", "NK_R", "NK", "Plasma")
df_list = list()
for (celltype in celltypes){
chrom_df = data.frame(celltype = celltype, chromosome = 1:22)
celltype_dir = paste0(tensorqtl_dir, celltype,"/")
for (chrom in 1:22){
# significant results (main analysis)
myfile = paste0(celltype_dir,celltype,".sig_cis_qtl_pairs.chr",chrom,".csv")
df = read.csv(myfile, sep="\t")
df$qv = qvalue(df$pval_beta, pi0=1)$qvalue
chrom_df[chrom_df$celltype == celltype & chrom_df$chromosome == chrom,"n_sig_genes"] = nrow(df[df$qv<0.05,])
# additional results (conditional analysis)
myfile2 = paste0(celltype_dir,celltype,".independent_cis_qtl_pairs.chr",chrom,".csv")
if (!file.exists(myfile2)){next}
df = read.csv(myfile2, sep="\t")
if (length(unique(df$rank)) == 1){
if (nrow(df) == 1){df$qv = df$pval_beta}
else{df$qv = qvalue(df$pval_beta, pi0=1)$qvalue}
chrom_df[chrom_df$celltype == celltype & chrom_df$chromosome == chrom,"n_genes_rank1"] = nrow(df[df$qv<0.05,])
}
if (length(unique(df$rank)) > 1){
for (rank in unique(df$rank)){
df_curr = df[df$rank == rank,]
if (nrow(df_curr) == 1){
df_curr$qv = df_curr$pval_beta
}
else{df_curr$qv = qvalue(df_curr$pval_beta, pi0=1)$qvalue}
chrom_df[chrom_df$celltype == celltype & chrom_df$chromosome == chrom,paste0("n_genes_rank",rank)] = nrow(df_curr[df_curr$qv<0.05,])
}
}
}
df_list[[celltype]] = chrom_df
}
summary_df = rbindlist(df_list,fill = TRUE)
for (celltype in celltypes){
ct_df = data.frame()
celltype_dir = paste0(tensorqtl_dir, celltype,"/")
for (chrom in 1:22){
# significant results (main analysis)
myfile = paste0(celltype_dir,celltype,".sig_cis_qtl_pairs.chr",chrom,".csv")
df = read.csv(myfile, sep="\t")
df$qv = qvalue(df$pval_beta, pi0=1)$qvalue
chrom_df = data.frame(egene = df[df$qv<0.1,"phenotype_id"], fdr = df[df$qv<0.1,"qv"])
ct_df = rbind(ct_df, chrom_df)
}
write.csv(ct_df, paste0("/share/ScratchGeneral/anncuo/OneK1K/saige_eqtl/tensorqtl_egenes/",celltype,".csv"))
}
# plot number of eGenes (barplot)
celltype_summary = summary_df[, sum(n_sig_genes),by=list(celltype)]
# reoder cell types
celltype_summary$celltype <- factor(celltype_summary$celltype,
levels = c("CD4_NC","CD4_ET","CD4_SOX4","CD8_ET","CD8_NC","CD8_S100B","NK",
"NK_R","Plasma","B_Mem","B_IN","Mono_C","Mono_NC","DC"))
# assign original colours to cell types
df_colours = data.frame(colours = c("#882E72","#B178A6","#D6C1DE","#1965B0","#5289C7","#7BAFDE","#4EB265",
"#90C987","#CAE0AB","#F7EE55","#F6C141","#F1932D","#E8601C","#DC050C"),
celltype = c("CD4_NC","CD4_ET","CD4_SOX4","CD8_ET","CD8_NC","CD8_S100B","NK","NK_R",
"Plasma","B_Mem","B_IN","Mono_C","Mono_NC","DC"))
options(repr.plot.width = 12, repr.plot.height = 8)
p = ggplot(as.data.frame(celltype_summary), aes(x=celltype, y=V1, fill=celltype)) + geom_bar(stat = "identity")
p = p + xlab("") + ylab("Number of genes at FDR<5%") + ylim(c(0,4200))
p = p + scale_fill_manual(values = df_colours$colours) + theme_classic()
# need to save this
p = p + theme(text = element_text(size=20)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(p)
# plot number of eGenes by number of cells (scatter)
meta = read.csv("/share/ScratchGeneral/anncuo/OneK1K/metadata.csv")
ncells_df = data.frame(celltype_old = unique(meta$cell_type))
for (celltype in unique(meta$cell_type)){
ncells_df[ncells_df$celltype_old == celltype, "n_cells"] = nrow(meta[meta$cell_type == celltype,])
}
# update cell names
celltype <- c("CD4_NC", "CD4_ET", "CD4_SOX4",
"CD8_ET", "CD8_NC", "CD8_S100B",
"NK", "NK_R",
"Plasma", "B_Mem", "B_IN",
"Mono_C", "Mono_NC", "DC")
celltype_old <- c("CD4+ KLRB1- T cell", "CD4+ KLRB1+ T cell", "CD4+ SOX4+ T cell",
"CD8+ GNLY+ NKG7+ T cell", "CD8+ LTB+ T cell", "CD8+ S100B+ T cell",
"XCL1- NK","XCL1+ NK",
"IgJ+ B cell", "TCL1A- FCER2- B cell", "TCL1A+ FCER2+ B cell",
"Monocyte CD14+", "Monocyte FCGR3A+", "Dendritic cell")
cells <- data.frame(celltype, celltype_old)
# combine info
ncells_df2 = as.data.frame(data.table(cells)[data.table(ncells_df), on="celltype_old"])
ncells_df3 = as.data.frame(data.table(celltype_summary)[data.table(ncells_df2), on="celltype"])
# exclude non-PBMCs
ncells_df3 = ncells_df3[!(ncells_df3$celltype_old %in% c("Erythrocytes","Platelets")),]
ncells_df3$n_sig_genes = ncells_df3$V1
ncells_df3$celltype <- factor(ncells_df3$celltype,
levels = c("CD4_NC","CD4_ET","CD4_SOX4","CD8_ET","CD8_NC","CD8_S100B","NK",
"NK_R","Plasma","B_Mem","B_IN","Mono_C","Mono_NC","DC"))
cor = round(cor(ncells_df3$n_cells, ncells_df3$n_sig_genes), digits=2)
options(repr.plot.width = 10, repr.plot.height = 8)
p = ggplot(ncells_df3, aes(x=log10(n_cells), y=n_sig_genes, colour=celltype)) + geom_point(size=10)
p = p + scale_colour_manual(values = df_colours$colours) + theme_classic()
p = p + xlab("Number of cells (log10)") + ylab("Number of significant eGenes (FDR<5%)")
p = p + theme(text = element_text(size=20)) + ggtitle(paste0("TensorQTL, cor=",cor))
print(p)