-
Notifications
You must be signed in to change notification settings - Fork 0
/
thesis_notebook.Rmd
388 lines (283 loc) · 13 KB
/
thesis_notebook.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
---
title: "Stevie's Senior Thesis"
output: html_notebook
---
Load libraries
```{r}
setwd("~/senior_thesis/)
source('libraries.R')
source('get_nc.R')
```
Download data files
```{r}
model.list = c("HadGEM3-GC31-LL","EC-Earth3", "MRI-ESM2-0", "NorESM2-LM", "NorESM2-MM", "CMCC-CM2-SR5", "AWI-CM-1-1-MR")
lapply(model.list, get_nc)
#system("az vm deallocate --name atlantisserver01 --no-wait --resource-group morzariacedogroup")
#get_nc(model.name = "AWI-CM-1-1-MR")
```
Using epwshiftr to specify which files to download
```{r}
# NOTE: epwshiftr package does not work with this version of R (3.6.3), so I had to run this section in a different server
# set directory to store files
options(epwshiftr.dir = tempdir())
options(epwshiftr.verbose = TRUE)
# get CMIP6 data nodes
(nodes <- get_data_node())
# create a CMIP6 output file index
idx <- init_cmip6_index(
# only consider ScenarioMIP activity
activity = "ScenarioMIP",
# specify variables POC, NPP, and MLD
variable = c("expc", "epc100", "pp", "ppos", "mlotst", "mlotstmax", "mlotstmin"),
# specify report frequency
frequency = c("yr", "mon"),
# specify experiment name
experiment = c("ssp585"),
# specify GCM names
source = c("CESM2", "CESM2-WACCM", "GFDL-ESM4", "MPI-ESM1-2-HR",
"MPI-ESM1-2-LR", "IPSL-CM6A-LR", "UKESM1-0-L"),
# specify variant,
variant = NULL,
# save to data dictionary
save = TRUE
)
#saving CMIP6 data list
write.csv(idx,"~/walker_thesis/download_indexes/cmip6_index.csv", row.names = TRUE)
cmip6.index <- read_csv("~/senior_thesis/download_indexes/cmip6_index.csv")
```
Download data files
```{r}
source('get_nc_thredds.R')
#creating save path
save.path <- "~/senior_thesis/cmip6_data"
dir.create(save.path)
#first attempt (got CESM2, CESM2-WACCM, MPI-ESM1-2-HR)
cmip6.index <- read_csv("~/senior_thesis/download_indexes/cmip6_index.csv")
get_nc_thredds(cmip6.index, save.path)
#GFDL download (only MLD)
cmip6.index <- read_csv("~/senior_thesis/download_indexes/cmip6_index_gfdl.csv")
get_nc_thredds(cmip6.index, save.path)
#GFDL download - manually pasted file urls from ESGF search query, failed to connect to node and could not download
cmip6.index <- read_csv("~/senior_thesis/download_indexes/gfdl_urls.csv")
get_nc_thredds(cmip6.index, save.path)
#IPSL download - failed because it could not connect to vesg.ipsl.upmc.fr data node
cmip6.index <- read_csv("~/senior_thesis/download_indexes/cmip6_index_ipsl.csv")
get_nc_thredds(cmip6.index, save.path)
#UKESM download - manually pasted file urls from ESGF search query
cmip6.index <- read_csv("~/senior_thesis/download_indexes/ukesm_urls.csv")
get_nc_thredds(cmip6.index, save.path)
```
Combine multipart files into a single nc file
```{r}
# list of all files with gr: "data reported on a model's regular grid"
model.files <- list.files("~/senior_thesis/CESM_data/", pattern = "*_gn_*.*nc$")
# list of unique models and scenarios
model.scenarios <- list.files("~/senior_thesis/CESM_data/", pattern = "*_gn_*.*nc$") %>%
str_split(.,"_gn") %>%
unlist %>%
#switch between variables and model reps here. You can do this manually here, then rerun. ( Note on 2/8/21)
grep("mlotstmax_Omon_CESM2_ssp585_r10i1p1f1",., value = TRUE) %>%
unique()
data.path <- "~/senior_thesis/CESM_data/"
save.path <- "~/senior_thesis/combined_CESM_files"
dir.create(save.path)
lapply(model.scenarios,combine_files, model.files, save.path, data.path)
```
Get metadata for combined files NOTE: I am working on a function to do this for any file, but it isn't done yet
```{r}
model.files <- list.files("~/senior_thesis/combined_CESM_files/")
data.path <- "~/senior_thesis/combined_CESM_files"
save.path <- "~/senior_thesis/ncfile_metadata"
dir.create(save.path)
lapply(model.files,grab_metadata, save.path, data.path)
# # POC flux at 100m ---------------
setwd("~/senior_thesis/combined_CESM_files")
nc_data <- nc_open('epc100_Omon_CESM2_ssp585_r10i1p1f1_gn_201501-210012.nc')
# Save the metadata to a text file
sink('epc100_Omon_CESM2_ssp585_r10i1p1f1_gn_201501-210012.txt')
print(nc_data)
setwd("~/senior_thesis/ncfile_metadata/")
sink()
#change back to current working directory
setwd("~/senior_thesis/combined_CESM_files")
nc_close(nc_data)
# # MLD max -------------------
setwd("~/senior_thesis/combined_CESM_files")
nc_data <- nc_open('mlotst_Omon_CESM2_ssp585_r10i1p1f1_gn_201501-210012.nc')
# Save the metadata to a text file
sink('mlotst_Omon_CESM2_ssp585_r10i1p1f1_gn_201501-210012.txt')
print(nc_data)
setwd("~/senior_thesis/ncfile_metadata/")
sink()
#change back to current working directory
setwd("~/senior_thesis/combined_CESM_files")
nc_close(nc_data)
```
Calculate short-term (2014-2034) and long-term (2079-2099) average downward flux of POC at 100m
units: (mol m-2 y-1)
```{r}
setwd("~/senior_thesis/")
source('calc_epc100_avg.R')
```
Plot 3 panel figure showing short-term (2014-2034), long-term (2079-2099), and difference in average downward flux of POC at 100m
```{r}
setwd("~/senior_thesis/")
source('plot_epc100_avg.R')
```
Calculate maximum annual MLD in the short-term (2014-2034) and long-term (2079-2099) units: m
```{r}
setwd("~/senior_thesis/")
source('calc_MLD_max.R')
```
Plot 3 panel figure showing short-term (2014-2034), long-term (2079-2099), and difference in average maximum annual MLD
```{r}
setwd("~/senior_thesis/")
source('plot_MLD_max.R')
```
Plot depth profiles in various locations
```{r}
setwd("~/senior_thesis/combined_CESM_files/")
nc_data <- nc_open('expc_Oyr_CESM2_ssp585_r10i1p1f1_gn_2015-2100.nc')
#NOTE: this section won't run in the notebook, so paste these lines into a script and run first before sourcing function
#get expc in 2014-2034
variable.st <- ncvar_get(nc_data,"expc",start= c(1,1,1,1), count = c(-1,-1,-1,20))
#get expc in 2079-2099
variable.lt <- ncvar_get(nc_data,"expc",start= c(1,1,1,66), count = c(-1,-1,-1,20))
#calcuates short-term average POC flux over 20 years, still including the entire water column averages
avg_expc_st <- apply(variable.st,c(1,2,3),mean,na.rm=TRUE)
#calcuates short-term average POC flux over 20 years, still including the entire water column averages
avg_expc_lt <- apply(variable.lt,c(1,2,3),mean,na.rm=TRUE)
#saves these arrays as .rds files so they are easier to load next time I need them
setwd("~/senior_thesis/plotting_dataframes/")
saveRDS(avg_expc_st, file = "avg_expc_st.Rds", ascii = TRUE)
saveRDS(avg_expc_lt, file = "avg_expc_lt.Rds", ascii = TRUE)
#end section
source('~/senior_thesis/depth_profiles.R')
#load short-term and long-term average POC flux arrays
avg_expc_st <- readRDS("~/senior_thesis/plotting_dataframes/avg_expc_st.Rds")
avg_expc_lt <- readRDS("~/senior_thesis/plotting_dataframes/avg_expc_lt.Rds")
# LABRADOR SEA
profile_create(grid.cell = c(305,355), depth.range = c(3000,0), expc.range = c(0,4),
title = "a) Labrador Sea",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "labrador_sea_depth_profile")
# EQUATORIAL EASTERN PACIFIC
profile_create(grid.cell = c(274,184), depth.range = c(3200,0), expc.range = c(0,13),
title = "c) Equatorial Eastern Pacific",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "equatorial_eastern_pacific_depth_profile")
# SOUTHERN OCEAN
profile_create(grid.cell = c(99,63), depth.range = c(3000,0), expc.range = c(0,4),
title = "e) Southern Ocean",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "southern_ocean_depth_profile")
# SUBTROPICAL INDIAN OCEAN (not using in thesis)
profile_create(grid.cell = c(120,150), depth.range = c(5000,0), expc.range = c(0,4),
title = "Subtropical Indian Ocean",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "subtropical_indian_ocean_depth_profile")
# ICELAND BASIN
profile_create(grid.cell = c(30,352), depth.range = c(1200,0), expc.range = c(0,5),
title = "b) Iceland Basin",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "iceland_basin_depth_profile")
#SUBTROPICAL NORTH PACIFIC
profile_create(grid.cell = c(230,275), depth.range = c(5000,0), expc.range = c(0,1.5),
title = "d) Subtropical North Pacific",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "subtropical_north_pacific_depth_profile")
# BELLINGHAUSEN SEA
profile_create(grid.cell = c(276,21), depth.range = c(3300,0), expc.range = c(0,2),
title = "f) Bellingshausen Sea",
data.path = "~/senior_thesis/combined_CESM_files/",
save.path = "~/senior_thesis/depth_profiles",
save.name = "bellinghausen_sea_depth_profile")
#arranging into 6 panel figure (NOTE: run this in a separate script, there's a glitch in readPNG)
setwd("~/senior_thesis/depth_profiles/")
A <- readPNG('labrador_sea_depth_profile.png')
B <- readPNG('iceland_basin_depth_profile.png')
C <- readPNG('equatorial_eastern_pacific_depth_profile.png')
D <- readPNG('subtropical_north_pacific_depth_profile.png')
E <- readPNG('southern_ocean_depth_profile.png')
f <- readPNG('bellinghausen_sea_depth_profile.png')
grid.combine <- grid.arrange(rasterGrob(A),rasterGrob(B),rasterGrob(C),
rasterGrob(D),rasterGrob(E),rasterGrob(f),ncol=2, nrow=3)
ggsave(grid.combine, filename = "faceted_depth_profiles.png", device = "png", path = "~/senior_thesis/depth_profiles/",
dpi = 500, width = 16, height = 22, units = "cm")
```
Organizing depth profile output for table info
```{r}
#combine table output csv files into one file
files <- list.files("~/senior_thesis/depth_profiles/", pattern = "*.csv$")
setwd("~/senior_thesis/depth_profiles/")
read.files <- lapply(files,read_csv)
binded_file <- rbindlist(read.files, fill = TRUE)
write.csv(binded_file, "table_info.csv")
```
Calculate global POC flux at 100m
```{r}
#downloading CESM cell area files
save.path <- "~/senior_thesis/cmip6_data"
cmip6.index <- "http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/ScenarioMIP/NCAR/CESM2/ssp585/r10i1p1f1/Ofx/areacello/gn/v20200528/areacello_Ofx_CESM2_ssp585_r10i1p1f1_gn.nc"
get_nc_thredds(cmip6.index, save.path)
setwd("~/senior_thesis/cmip6_data")
nc_data <- nc_open('areacello_Ofx_CESM2_ssp585_r10i1p1f1_gn.nc')
#get file metadata
sink('areacello_Ofx_CESM2_ssp585_r10i1p1f1_gn.txt')
print(nc_data)
setwd(save.path)
sink()
#get longitude and latitude
lon <- ncvar_get(nc_data, "nlon")
lat <- ncvar_get(nc_data, "nlat")
#get area
area <- ncvar_get(nc_data, "areacello")
#read in global POC flux at 100m data
epc100_avg_st <- readRDS("~/senior_thesis/plotting_dataframes/epc100_avg_st.Rds")
epc100_avg_lt <- readRDS("~/senior_thesis/plotting_dataframes/epc100_avg_lt.Rds")
global_flux_st <- epc100_avg_st*area
sum_st <- sum(global_flux_st, na.rm = TRUE)
#SHORT TERM total global POC flux in Pt C / yr = 7.12
sum_st <- sum_st*12.01/1000000000000000
global_flux_lt <- epc100_avg_lt*area
sum_lt <- sum(global_flux_lt, na.rm = TRUE)
#LONG TERM total global POC flux in Pt C / yr = 6.81
sum_lt <- sum_lt*12.01/1000000000000000
percent_change <- (sum_st-sum_lt)/sum_st*100
```
Formatting data for plotting POC flux transects
```{r}
#Atlantic Transect
calc_transect(lon = 10, save.name.st = "lon10_transect_st", save.name.lt = "lon10_transect_lt")
#Pacific Transect
calc_transect(lon = 205, save.name.st = "lon205_transect_st", save.name.lt = "lon205_transect_lt")
```
Plotting POC flux transects
```{r}
#loading objects
atlantic_st <- readRDS("~/senior_thesis/plotting_dataframes/lon10_transect_st.Rds")
atlantic_lt <- readRDS("~/senior_thesis/plotting_dataframes/lon10_transect_lt.Rds")
pacific_st <- readRDS("~/senior_thesis/plotting_dataframes/lon205_transect_st.Rds")
pacific_lt <- readRDS("~/senior_thesis/plotting_dataframes/lon205_transect_lt.Rds")
mean_MLD_max_2079_2099 <- readRDS("~/senior_thesis/plotting_dataframes/mean_MLD_max_2079_2099.Rds")
mean_MLD_max_2014_2033 <- readRDS("~/senior_thesis/plotting_dataframes/mean_MLD_max_2014_2033.Rds")
#I haven't fixed the title aspect of the plot_transect function, so be sure to change this manually before generating figures
#3 panel atlantic transect, full depth and zoomed in
plot_transect(lon = 10,
save.name = "Atlantic_transect.png",
save.name.zoom = "Atlantic_transect_1000m.png",
transect.st = atlantic_st,
transect.lt = atlantic_lt)
#3 panel pacific transect, full depth and zoomed in
plot_transect(lon = 205,
save.name = "Pacific_transect.png",
save.name.zoom = "Pacific_transect_1000m.png",
transect.st = pacific_st,
transect.lt = pacific_lt)
```