-
Notifications
You must be signed in to change notification settings - Fork 3
/
exploration.Rmd
126 lines (95 loc) · 3.59 KB
/
exploration.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
---
title: "Data set exploration"
author: "Javier Castillo-Arnemann"
date: "October 12, 2018"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
##Exploratory data analysis
```{r}
suppressPackageStartupMessages(library(tidyverse))
library(cowplot)
dataset <- read.table("data/df_MASTERTABLE.txt", sep = "\t", header = TRUE)
#Order depth factor for plotting
dataset$DEPTH <- factor(dataset$DEPTH, levels = c("MULTI","MES","DCM","MIX", "SRF"))
n_distinct(dataset$SAMPLE) #total number of samples
#Depth vs region
dataset %>%
ggplot(aes(GEOREGION, as.integer(DEPTHM))) +
geom_point() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_y_reverse()
#Combine LAT LON
dataset$LOC <- paste(dataset$LAT, dataset$LONG, sep = ", ")
#Find how many samples are in every location
dataset %>%
group_by(LOC) %>%
summarize(samples_per_loc = n_distinct(SAMPLE))
#Play around with one sample
cenf <- filter(dataset, SAMPLE == "cenf")
#Check OMZ depths
depths <- filter(dataset, DEPTH_OMZ == "OMZ") %>% select(DEPTHM, DEPTH_OMZ, DEPTH)
#Depth heatmap
dataset %>%
filter(DEPTH_OMZ != "MULTI") %>%
ggplot(aes(LEVEL1, DEPTH)) +
geom_tile(aes(fill = log(RPKM)),
colour = "white") +
scale_fill_gradient(low = "white",
high = "darkblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Pathway type",
y = "Ocean Layer")
dataset %>%
filter(DEPTH_OMZ != "MULTI") %>%
ggplot(aes(LEVEL2, DEPTH)) +
geom_tile(aes(fill = log(RPKM)),
colour = "white") +
scale_fill_gradient(low = "white",
high = "darkblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Comparing pathways in different samples
sample1 <- dataset %>%
filter(SAMPLE == "cenf") %>%
mutate(sample_id = as.factor(1))
sample2 <- dataset %>%
filter(SAMPLE == "ERR599337") %>%
mutate(sample_id = as.factor(2))
samples <- rbind(sample1, sample2) %>%
filter(RPKM != 0) %>%
arrange(RPKM)
#Common pathways
intersect(sample1$PWY_NAME, sample2$PWY_NAME)
#Common pathways level2
intersect(sample1$LEVEL2, sample2$LEVEL2)
#common pathways level1
intersect(sample1$LEVEL1, sample2$LEVEL1)
#plot pathway comparison
theme_set(theme_cowplot(font_size=12)) # reduce default font size
level1_p <- samples %>%
ggplot(aes(fct_reorder(LEVEL1, RPKM, .desc = TRUE), as.character(sample_id))) +
geom_tile(aes(fill = log(RPKM)), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(x = "Pathway", y = "Group") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
level2_p <- samples %>%
ggplot(aes(fct_reorder(LEVEL2, RPKM, .desc = TRUE), as.character(sample_id))) +
geom_tile(aes(fill = log(RPKM)), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(x = "Pathway", y = "Group") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))
level3_p <- samples %>%
ggplot(aes(fct_reorder(LEVEL3, RPKM, .desc = TRUE), as.character(sample_id))) +
geom_tile(aes(fill = log(RPKM)), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(x = "Pathway", y = "Group") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
level1_p
level2_p
level3_p
```