forked from Data4Democracy/house_expenditures
-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore_house_exp.Rmd
123 lines (103 loc) · 3.8 KB
/
explore_house_exp.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
title: "Exploring House Expenditures"
output:
html_document: default
html_notebook: default
---
```{r load packages}
library(tidyverse, quietly = T)
library(stringr, quietly = T)
```
### Loading Data
```{r load data}
# Set up your working directory
#setwd("~/Documents/github/house_expenditures")
# Download and read Q3 detail files from data.world
file_downloads <- c("https://query.data.world/s/5o0xnjfh6iuo0kh62b1rzp8xf", #2016
"https://query.data.world/s/a86tqnnfe16f03x6wuxk1z0g7", #2015
"https://query.data.world/s/7gm9yyoh8zjffcdnlpboc12sd", #2014
"https://query.data.world/s/3typrwb3pwd32jiqnusc5nfiw", #2013
"https://query.data.world/s/9kfxw5pfebclr3sjmqppwy4at", #2012
"https://query.data.world/s/3fe96e3unm3pr0hw6ao9pynnd", #2011
"https://query.data.world/s/ak20z1hk153nyrghsejfuazjz", #2010
"https://query.data.world/s/30807xh494jhuk28slw6jhi1f" #2009
)
alldata <- lapply(file_downloads, function(x) {
read_csv(x, col_types = cols(
`START DATE` = col_date(format = "%m/%d/%y"),
`END DATE` = col_date(format = "%m/%d/%y")
))
})
df <- bind_rows(alldata) # Combine into one dataset
```
### Cleaning Data
I removed the commas in the AMOUNT column and changed the START.DATE and END.DATE
columns to the date class. Dollar amounts were updated to reflect 2016 dollars as
found on [http://www.usinflationcalculator.com/](http://www.usinflationcalculator.com/).
```{r clean data}
# Convert to 2016 dollars
# a = dollar amoung
# r = inflation rate found on http://www.usinflationcalculator.com/
convertDollars <- function(a, r) {
round(a + (a * r), 2)
}
house_expenditures_clean <- df %>%
mutate(`2016_dollars` = as.numeric("")) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2009Q3", convertDollars(AMOUNT, .125), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2010Q3", convertDollars(AMOUNT, .107), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2011Q3", convertDollars(AMOUNT, .073), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2012Q3", convertDollars(AMOUNT, .051), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2013Q3", convertDollars(AMOUNT, .036), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2014Q3", convertDollars(AMOUNT, .02), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2015Q3", convertDollars(AMOUNT, .018), `2016_dollars`)
) %>%
mutate(`2016_dollars` = if_else(
QUARTER == "2016Q3", AMOUNT, `2016_dollars`)
) %>%
select(BIOGUIDE_ID:AMOUNT, `2016_dollars`, everything())
```
### Functions
```{r load functions}
# What is the total yearly expenditure for an office during the Q3 measurement?
by_office <- house_expenditures_clean %>%
group_by(OFFICE, QUARTER) %>%
summarise("yearly_exp" = sum(`2016_dollars`))
officeYearly <- function(office) {
by_office[by_office$OFFICE == office, ]
}
# What is the total yearly expenditure paid to a payee during the Q3 measurement?
by_payee <- house_expenditures_clean %>%
group_by(PAYEE, QUARTER) %>%
summarise("yearly_exp" = sum(`2016_dollars`))
payeeYearly <- function(payee) {
by_payee[by_payee$PAYEE == payee, ]
}
```
<!-- ### Example -->
<!-- What are the historical third quarter totals for ADMIN AND OPS in 2016 dollars? -->
<!-- ```{r} -->
<!-- officeYearly("ADMIN AND OPS") -->
<!-- ``` -->
```{r write CSVs}
# Check for output folder and create it if needed.
if (file.exists("output") == FALSE) {
dir.create("output")
}
# Write CSVs
write_csv(house_expenditures_clean, "output/house_expenditures_clean.csv")
write_csv(by_office, "output/by_office.csv")
write_csv(by_payee, "output/by_payee.csv")
```