-
Notifications
You must be signed in to change notification settings - Fork 0
/
STAT5421 HW3.Rmd
167 lines (141 loc) · 4.17 KB
/
STAT5421 HW3.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
---
title: "STAT5421_HW3"
author: "Ruwiada Al Harrasi"
date: "`r Sys.Date()`"
output:
html_document: default
pdf_document: default
---
###3.1
#a
```{r}
set.seed(5421)
library(CatDataAnalysis)
data(table_4.3)
names(table_4.3)
sapply(table_4.3, class)
table_4.3$color= as.factor(table_4.3$color)
table_4.3$spine= as.factor(table_4.3$spine)
sapply(table_4.3, class)
```
```{r}
set.seed(5421)
#Test whether any of the terms in the model can be dropped. use any one of Wald, Wilks, Rao for all the tests, your choice. (Do not do more than one of these. Pick one and use it.) Report the P-value for each test. What model do these tests suggest we use?
gout <- glm(satell ~width+weight+color+spine, poisson, data= table_4.3)
gout3=drop1(gout, test = "LRT")
summary(gout)
#Do a Poisson regression of satell
gout1 <- glm(satell ~weight+color , family = poisson(link = "log") , data= table_4.3)
summary(gout1)
```
#b
```{r}
set.seed(5421)
anova(gout, gout1, test = "LRT")
```
The p value is greater than 0.05 which means we will reject the second model and use the 1st model. The second model suggest that the only two significant variables are weight + color.
###3.2
```{r}
library(MCM)
library(KernSmooth)
```
#a
```{r}
set.seed(5421)
gout1=glm(satell ~ 0+ weight+color , family = poisson , data= table_4.3, x=T)
modmat <- gout1$x
response <- gout1$y
summary(gout1)
```
```{r}
set.seed(5421)
library("mcmc")
library("KernSmooth")
y <- mean(gout1$y)
logl <- function(beta) {
logll= (sum(y * beta - exp(beta)))
return(logll)
}
# Define log unnormalized prior function for Poisson regression
log.unnormalized.prior <- function(beta) {
stopifnot(is.numeric(beta))
stopifnot(is.finite(beta))
stopifnot(length(beta) == ncol(modmat))
log_prior <- (sum((0.5 * beta)) - sum(0.5 * exp(beta)))
return(log_prior)
}
log.unnormalized.posterior = function(beta) logl(beta)+ log.unnormalized.prior(beta)
```
```{r}
set.seed(5421)
# Run Metropolis-Hastings sampling
mout <- metrop(log.unnormalized.posterior, rep(0, ncol(modmat)),
nbatch = 1000000,blen = 1)
mout$accept
mout$time
colnames(mout$batch) <- names(gout1$coefficients)
foo <- as.ts(mout$batch)
colMeans(foo)
apply(foo, 2, sd) / sqrt(mout$nbatch)
```
the running time when nbatch = 1000 is 69.061 and the accepetance .102. and the Se are relativly low .01
```{r}
set.seed(5421)
foo <- sqrt(diag(var(mout$batch)))
mout <- metrop(mout, scale = foo)
mout$accept
mout$time
colnames(mout$batch) <- names(gout1$coefficients)
foo <- as.ts(mout$batch)
apply(foo, 2, sd) / sqrt(mout$nbatch)
```
```{r}
set.seed(5421)
mout <- metrop(mout, nbatch = 1000000)
mout$accept
t.test(mout$accept.batch)$conf.int
colnames(mout$batch) <- names(gout1$coefficients)
foo <- as.ts(mout$batch)
mean(mout$batch)
apply(foo, 2, sd) / sqrt(mout$nbatch)
mout$time
```
when I changed the nbatch to 10000000 the running time become more than a minute.
#b
```{r}
set.seed(5421)
foo <- sqrt(diag(var(mout$batch)))
mout <- metrop(mout, scale = foo)
mout$accept
mout$time
colnames(mout$batch) <- names(gout1$coefficients)
foo <- as.ts(mout$batch)
apply(foo, 2, sd) / sqrt(mout$nbatch)
```
when we have the scale as sqrt(diag(var(mout$batch))) Monte Carlo standard errors doesn't change much but the acceptance rate got higher
```{r}
set.seed(5421)
mout <- metrop(mout, scale=.0002 )
mout$accept
t.test(mout$accept.batch)$conf.int
colnames(mout$batch) <- names(gout1$coefficients)
foo <- as.ts(mout$batch)
apply(foo, 2, sd) / sqrt(mout$nbatch)
mout$time
```
When changing the scale to .0002 with a very good acceptance Monte Carlo standard errors to be less than 0.001. with acceptance rate .99 and the running time doesn't seem to change a lot but it lloks higher than using sqrt(diag(var(mout$batch))) as a scaler.
#c
```{r}
set.seed(5421)
mout <- metrop(log.unnormalized.posterior, rep(0, ncol(modmat)),
nbatch = 10000, blen = 1)
colnames(mout$batch) <- names(gout1$coefficients)
nu1 <- mout$batch[,"weight"]
idx.sub <- seq(1, length(nu1), by = 500)
nu1.sub <- nu1[idx.sub]
bw <- dpik(nu1.sub)
den <- bkde(nu1, bandwidth = bw)
# Set x-axis limits from -2 to 2
plot(den, type = "l", xlab = "weight", ylab = "probability density",ylim = c(0, 1))
```
We can seee that we have a normal distrebution with a mean close to 1.