-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_effect_type.R
151 lines (116 loc) · 8.46 KB
/
prepare_effect_type.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#Emine Ozsahin
# July 2020
library(qtl2)
library(stringr)
args <- commandArgs(trailingOnly = TRUE)
filename <- args[1]
#Example file name: QTL.qqnorm_chr1.RData
load(filename)
peaks <- peaks[order(peaks[,5],decreasing = TRUE),]
cat("finding qtl effects...")
qtl_effects <- data.frame()
for (i in 1:dim(peaks)[1]) { g <- as.data.frame(maxmarg(pr, map, chr=peaks$chr[i], pos=peaks$pos[i], return_char=TRUE))
colnames(g) <- "genotype"
phenotype <- wheat$pheno[,peaks$lodcolumn[i]]
g <- cbind(g, phenotype)
g <- g[!is.na(g$genotype),]
A <- g[g == "AA", ]
B <- g[g == "BB", ]
Amean <- mean(A$phenotype)
Bmean <- mean(B$phenotype)
Alen <- length(A$genotype)
Blen <- length(B$genotype)
e <- if(Amean == "NaN" | Bmean == "NaN") "NA" else if(Amean > Bmean) "AA" else "BB"
temp_gen <- data.frame(intron=peaks$lodcolumn[i], AA=Alen, BB=Blen, A.pheno.mean=Amean, B.pheno.mean=Bmean, effect=e)
qtl_effects <- rbind(qtl_effects, temp_gen)}
cat("qtl effects are generated..", "\n")
map <- read.csv("SNP-location2.csv")
map$Chr <- gsub("chr", "", map$Chr)
#finding the qtl type
cat("finding qtl positions on phsical map...", "\n")
peaks <- cbind(peaks, unlist(str_split_fixed(peaks$lodcolumn, ":",4)))
peaks$`1` <- ifelse(peaks$`1`== "21", "7D",
ifelse(peaks$`1`== "20", "6D",
ifelse(peaks$`1`== "19", "5D",
ifelse(peaks$`1`== "18", "4D",
ifelse(peaks$`1`== "17", "3D",
ifelse(peaks$`1`== "16", "2D",
ifelse(peaks$`1`== "15", "1D",
ifelse(peaks$`1`== "14", "7B",
ifelse(peaks$`1`== "13", "6B",
ifelse(peaks$`1`== "12", "5B",
ifelse(peaks$`1`== "11", "4B",
ifelse(peaks$`1`== "10", "3B",
ifelse(peaks$`1`== "9", "2B",
ifelse(peaks$`1`== "8", "1B",
ifelse(peaks$`1`== "7", "7A",
ifelse(peaks$`1`== "6", "6A",
ifelse(peaks$`1`== "5", "5A",
ifelse(peaks$`1`== "4", "4A",
ifelse(peaks$`1`== "3", "3A",
ifelse(peaks$`1`== "2", "2A",
ifelse(peaks$`1`== "1", "1A",
ifelse(peaks$`1`== "22", "Un", NA))))))))))))))))))))))
colnames(peaks)[8:11] <- c("intron_chr", "intron_start", "intron_end", "cluster")
qtl_pos_on_physical_map <- data.frame(stringsAsFactors = FALSE)
for (i in 1:length(peaks$chr)) {
pos <- map[map$Marker.position..CM..on.genetic.map > peaks$ci_lo[i] & map$Marker.position..CM..on.genetic.map < peaks$ci_hi[i] & map$Chr == peaks$chr[i],]
pos <- pos[order(pos$Marker.position.on.physical.map),]
pos <- pos[(!is.na(pos$Marker.position..CM..on.genetic.map)),]
temp <- data.frame(qtl_pos_on_physical_map=paste0(pos$Marker.position.on.physical.map[1], ":", tail(pos, n=1)$Marker.position.on.physical.map))
qtl_pos_on_physical_map <- rbind(qtl_pos_on_physical_map, temp)
}
f <- cbind(qtl_effects, peaks[,8:11], qtl_pos_on_physical_map, peaks$chr)
f <- cbind(f, unlist(str_split_fixed(f$qtl_pos_on_physical_map, ":",2)))
colnames(f)[12:14] <- c("qtl_chr", "qtl_start", "qtl_end")
as.num = function(x, na.strings = "NA") {
stopifnot(is.character(x))
na = x %in% na.strings
x[na] = 0
x = as.numeric(x)
x[na] = NA_real_
x
}
f$intron_start <- as.numeric(as.character(f$intron_start))
f$intron_end <- as.numeric(as.character(f$intron_end))
f$qtl_start <- as.num(as.character(f$qtl_start, na.strings = "NA"))
f$qtl_start <- f$qtl_start-1000000
f$qtl_end <- as.num(as.character(f$qtl_end, na.strings = "NA"))
f$qtl_end <- f$qtl_end+1000000
f$t <- 1:length(f$intron)
for (i in 1:length(f$intron)) { f$t[i] <- if(!is.na(f$qtl_end[i]) & !is.na(f$qtl_end[i]) & f$qtl_chr[i] == f$intron_chr[i] & f$intron_start[i] > f$qtl_start[i] & f$intron_start[i] < f$qtl_end[i] & f$intron_end[i] > f$qtl_start[i] & f$intron_end[i] < f$qtl_end[i] ) "cis" else "trans" }
f$t <- as.character(f$t)
f$qtl_type <- f$t
f$qtl_type <- as.character(f$qtl_type)
for (i in 1:length(f$t)) {f$qtl_type[i] <- if(is.na(f$qtl_end[i])) f$qtl_end[i] else f$t[i]}
f <- f[,-which(colnames(f) == "t")]
f <- cbind(f, peaks$lod)
colnames(f)[which(colnames(f) =="peaks$lod")] <- "lod"
filename <- gsub("QTL.qqnorm_", "", filename)
filename <- gsub(".RData", "", filename)
filename <- gsub("chr", "", filename)
filename <- ifelse(filename == "21", "7D",
ifelse(filename == "20", "6D",
ifelse(filename == "19", "5D",
ifelse(filename == "18", "4D",
ifelse(filename == "17", "3D",
ifelse(filename == "16", "2D",
ifelse(filename == "15", "1D",
ifelse(filename == "14", "7B",
ifelse(filename == "13", "6B",
ifelse(filename == "12", "5B",
ifelse(filename == "11", "4B",
ifelse(filename == "10", "3B",
ifelse(filename == "9", "2B",
ifelse(filename == "8", "1B",
ifelse(filename == "7", "7A",
ifelse(filename == "6", "6A",
ifelse(filename == "5", "5A",
ifelse(filename == "4", "4A",
ifelse(filename == "3", "3A",
ifelse(filename == "2", "2A",
ifelse(filename == "1", "1A",
ifelse(filename == "22", "Un", NA))))))))))))))))))))))
write.csv(f[,-c(10,11)], paste0("chr", filename, "_qtl_effects.csv"))
#save(c2eff, file = paste0(filename, "c2eff.RData"))
#save.image()