|
| 1 | +--- |
| 2 | +title: "Positionnement multidimensionnel (MDS)" |
| 3 | +author: "Guyliann Engels & Philippe Grosjean" |
| 4 | +output: |
| 5 | + learnr::tutorial |
| 6 | +tutorial: |
| 7 | + id: "sdd2.06b" |
| 8 | + version: 1.0.0 |
| 9 | +runtime: shiny_prerendered |
| 10 | +--- |
| 11 | + |
| 12 | +```{r setup, include=FALSE} |
| 13 | +# Packages ---- |
| 14 | +library(learnr) |
| 15 | +library(knitr) |
| 16 | +SciViews::R() |
| 17 | +library(BioDataScience) |
| 18 | +library(ade4) |
| 19 | +
|
| 20 | +# Collect informations ------ |
| 21 | +options(tutorial.event_recorder = BioDataScience::record_sdd) |
| 22 | +tutorial_options(exercise.checker = BioDataScience::checker_sdd) |
| 23 | +tutorial_options(exercise.timelimit = 60) |
| 24 | +tutorial_options(exercise.cap = "Code R") |
| 25 | +knitr::opts_chunk$set(echo = FALSE, comment = NA) |
| 26 | +
|
| 27 | +# Preparation dataset ------ |
| 28 | +bci <- read("BCI", package = "vegan") |
| 29 | +# sciviews functions --------------- |
| 30 | +
|
| 31 | +SciViews::R() |
| 32 | +library(broom) |
| 33 | +
|
| 34 | +# function mds for several multidimensionnal scaling functions ------ |
| 35 | +mds <- function(dist, k = 2, type = c("metric", "nonmetric", "cmdscale", |
| 36 | + "wcmdscale", "sammon", "isoMDS", "monoMDS", "metaMDS"), p = 2, ...) { |
| 37 | + type <- match.arg(type) |
| 38 | + res <- switch(type, |
| 39 | + metric = , |
| 40 | + wcmdscale = structure(vegan::wcmdscale(d = dist, k = k, eig = TRUE, ...), |
| 41 | + class = c("wcmdscale", "mds", "list")), |
| 42 | + cmdscale = structure(stats::cmdscale(d = dist, k = k, eig = TRUE, ...), |
| 43 | + class = c("cmdscale", "mds", "list")), |
| 44 | + nonmetric = , |
| 45 | + metaMDS = structure(vegan::metaMDS(comm = dist, k = k, ...), |
| 46 | + class = c("metaMDS", "monoMDS", "mds", "list")), |
| 47 | + isoMDS = structure(MASS::isoMDS(d = dist, k = k, ...), |
| 48 | + class = c("isoMDS", "mds", "list")), |
| 49 | + monoMDS = structure(vegan::monoMDS(dist = dist, k = k, ...), |
| 50 | + class = c("monoMDS", "mds", "list")), |
| 51 | + sammon = structure(MASS::sammon(d = dist, k = k, ...), |
| 52 | + class = c("sammon", "mds", "list")), |
| 53 | + stop("Unknown 'mds' type ", type) |
| 54 | + ) |
| 55 | + # For non-metric MDS, we add also data required for the Shepard plot |
| 56 | + if (type %in% c("nonmetric", "sammon", "isoMDS", "monoMDS", "metaMDS")) |
| 57 | + res$Shepard <- MASS::Shepard(d = dist, x = res$points, p = p) |
| 58 | + res |
| 59 | +} |
| 60 | +class(mds) <- c("function", "subsettable_type") |
| 61 | +
|
| 62 | +# plot.mds : MDS2 ~ MDS1 -------------------------------- |
| 63 | +plot.mds <- function(x, y, ...) { |
| 64 | + points <- tibble::as_tibble(x$points, .name_repair = "minimal") |
| 65 | + colnames(points) <- paste0("mds", 1:ncol(points)) |
| 66 | + |
| 67 | + plot(data = points, mds2 ~ mds1,...) |
| 68 | +} |
| 69 | +
|
| 70 | +autoplot.mds <- function(object, labels, ...) { |
| 71 | + points <- tibble::as_tibble(object$points, .name_repair = "minimal") |
| 72 | + colnames(points) <- paste0("mds", 1:ncol(points)) |
| 73 | + |
| 74 | + if (!missing(labels)) { |
| 75 | + if (length(labels) != nrow(points)) |
| 76 | + stop("You must provide a character vector of length ", nrow(points), |
| 77 | + " for 'labels'") |
| 78 | + points$.labels <- labels |
| 79 | + chart::chart(points, mds2 ~ mds1 %label=% .labels, ...) + |
| 80 | + geom_point() + |
| 81 | + ggrepel::geom_text_repel() + |
| 82 | + coord_fixed(ratio = 1) |
| 83 | + } else {# Plot without labels |
| 84 | + chart::chart(points, mds2 ~ mds1, ...) + |
| 85 | + geom_point() + |
| 86 | + coord_fixed(ratio = 1) |
| 87 | + } |
| 88 | +} |
| 89 | +
|
| 90 | +shepard <- function(dist, mds, p = 2) |
| 91 | + structure(MASS::Shepard(d = dist, x = mds$points, p = p), |
| 92 | + class = c("shepard", "list")) |
| 93 | +
|
| 94 | +plot.shepard <- function(x, y, l.col = "red", l.lwd = 1, |
| 95 | + xlab = "Observed Dissimilarity", ylab = "Ordination Distance", ...) { |
| 96 | + she <- tibble::as_tibble(x, .name_repair = "minimal") |
| 97 | + |
| 98 | + plot(data = she, y ~ x, xlab = xlab, ylab = ylab, ...) |
| 99 | + lines(data = she, yf ~ x, type = "S", col = l.col, lwd = l.lwd) |
| 100 | +} |
| 101 | +
|
| 102 | +autoplot.shepard <- function(object, alpha = 0.5, l.col = "red", l.lwd = 1, |
| 103 | + xlab = "Observed Dissimilarity", ylab = "Ordination Distance", ...) { |
| 104 | + she <- tibble::as_tibble(object) |
| 105 | + |
| 106 | + chart(data = she, y ~ x) + |
| 107 | + geom_point(alpha = alpha) + |
| 108 | + geom_step(chart::f_aes(yf ~ x), direction = "vh", col = l.col, lwd = l.lwd) + |
| 109 | + labs(x = xlab, y = ylab) |
| 110 | +} |
| 111 | +
|
| 112 | +# augment.mds ------------------------------------------- |
| 113 | +augment.mds <- function(x, data, ...){ |
| 114 | + points <- as_tibble(x$points) |
| 115 | + colnames(points) <- paste0(".mds", 1:ncol(points)) |
| 116 | + bind_cols(data, points) |
| 117 | +} |
| 118 | +
|
| 119 | +# glance.mds ------------------------------------------- |
| 120 | +glance.mds <- function(x, ...){ |
| 121 | + if ("GOF" %in% names(x)) {# Probably cmdscale() or wcmdscale() => metric MDS |
| 122 | + tibble::tibble(GOF1 = x$GOF[1], GOF2 = x$GOF[2]) |
| 123 | + } else {# Non metric MDS |
| 124 | + # Calculate linear and non linear R^2 from the Shepard (stress) plot |
| 125 | + tibble::tibble( |
| 126 | + linear_R2 = cor(x$Shepard$y, x$Shepard$yf)^2, |
| 127 | + nonmetric_R2 = 1 - sum(vegan::goodness(x)^2) |
| 128 | + ) |
| 129 | + } |
| 130 | +} |
| 131 | +``` |
| 132 | + |
| 133 | +```{r, echo=FALSE} |
| 134 | +fixedRow( |
| 135 | + column(9, div( |
| 136 | + img(src = 'images/BioDataScience-128.png', align = "left"), |
| 137 | + h1("Science des données biologiques 2"), |
| 138 | + "Réalisé par le service d'Écologie numérique des Milieux aquatiques, Université de Mons (Belgique)" |
| 139 | + )), |
| 140 | + column(3, div( |
| 141 | + textInput("user", "Utilisateur :", ""), |
| 142 | + textInput("email", "Email :", "") |
| 143 | + )) |
| 144 | +) |
| 145 | +textOutput("user") # This is newer shown, but required to trigger an event! |
| 146 | +textOutput("email") # Idem! |
| 147 | +``` |
| 148 | + |
| 149 | +```{r, context="server"} |
| 150 | +output$user <- renderText({BioDataScience::user_name(input$user);""}) |
| 151 | +output$email <- renderText({BioDataScience::user_email(input$email);""}) |
| 152 | +updateTextInput(session, "user", value = BioDataScience::user_name()) |
| 153 | +updateTextInput(session, "email", value = BioDataScience::user_email()) |
| 154 | +``` |
| 155 | + |
| 156 | +## Préambule |
| 157 | + |
| 158 | +Si vous n'avez jamais utilisé de tutoriel "learnr", familiarisez-vous d'abord avec son interface [ici](http://biodatascience-course.sciviews.org/sdd-umons/learnr.html). |
| 159 | + |
| 160 | + |
| 161 | + |
| 162 | +**Ne vous trompez pas dans votre adresse mail et votre identifiant Github** |
| 163 | + |
| 164 | +**N'oubliez pas de soumettre votre réponse après chaque exercice** |
| 165 | + |
| 166 | +> Conformément au RGPD ([Règlement Général sur la Protection des Données](https://ec.europa.eu/info/law/law-topic/data-protection/reform/rules-business-and-organisations/principles-gdpr_fr)), nous sommes tenus de vous informer de ce que vos résultats seront collecté afin de suivre votre progression. **Les données seront enregistrées au nom de l'utilisateur apparaissant en haut de cette page. Corrigez si nécessaire !** En utilisant ce tutoriel, vous marquez expressément votre accord pour que ces données puissent être collectées par vos enseignants et utilisées pour vous aider et vous évaluer. Après avoir été anonymisées, ces données pourront également servir à des études globales dans un cadre scientifique et/ou éducatif uniquement. |
| 167 | +
|
| 168 | +## Contexte |
| 169 | + |
| 170 | +Des scientifiques ont réalisés des mesures sur l'Île Barro Colorado, Panama. Il s'agit d'une ile artificielle située sur le lac Gatùn. |
| 171 | + |
| 172 | +Le jeu de données se nomme `BCI` et provient du package `vegan`. Pour plus de simplicité nous le nommerons `bci`. |
| 173 | + |
| 174 | +```{r, echo = TRUE} |
| 175 | +bci <- read("BCI", package = "vegan") |
| 176 | +``` |
| 177 | + |
| 178 | +## MDS simplifiée sous SciViews::R |
| 179 | + |
| 180 | +Afin de vous simplifier la réalisation des mds, des fonctions sont mises à votre disposition. |
| 181 | + |
| 182 | +```{r, echo = TRUE} |
| 183 | +SciViews::R() |
| 184 | +library(broom) |
| 185 | +
|
| 186 | +# function mds for several multidimensionnal scaling functions ------ |
| 187 | +mds <- function(dist, k = 2, type = c("metric", "nonmetric", "cmdscale", |
| 188 | +"wcmdscale", "sammon", "isoMDS", "monoMDS", "metaMDS"), p = 2, ...) { |
| 189 | + type <- match.arg(type) |
| 190 | + res <- switch(type, |
| 191 | + metric = , |
| 192 | + wcmdscale = structure(vegan::wcmdscale(d = dist, k = k, eig = TRUE, ...), |
| 193 | + class = c("wcmdscale", "mds", "list")), |
| 194 | + cmdscale = structure(stats::cmdscale(d = dist, k = k, eig = TRUE, ...), |
| 195 | + class = c("cmdscale", "mds", "list")), |
| 196 | + nonmetric = , |
| 197 | + metaMDS = structure(vegan::metaMDS(comm = dist, k = k, ...), |
| 198 | + class = c("metaMDS", "monoMDS", "mds", "list")), |
| 199 | + isoMDS = structure(MASS::isoMDS(d = dist, k = k, ...), |
| 200 | + class = c("isoMDS", "mds", "list")), |
| 201 | + monoMDS = structure(vegan::monoMDS(dist = dist, k = k, ...), |
| 202 | + class = c("monoMDS", "mds", "list")), |
| 203 | + sammon = structure(MASS::sammon(d = dist, k = k, ...), |
| 204 | + class = c("sammon", "mds", "list")), |
| 205 | + stop("Unknown 'mds' type ", type) |
| 206 | + ) |
| 207 | + # For non-metric MDS, we add also data required for the Shepard plot |
| 208 | + if (type %in% c("nonmetric", "sammon", "isoMDS", "monoMDS", "metaMDS")) |
| 209 | + res$Shepard <- MASS::Shepard(d = dist, x = res$points, p = p) |
| 210 | + res |
| 211 | +} |
| 212 | +class(mds) <- c("function", "subsettable_type") |
| 213 | +
|
| 214 | +# plot.mds : MDS2 ~ MDS1 -------------------------------- |
| 215 | +plot.mds <- function(x, y, ...) { |
| 216 | + points <- tibble::as_tibble(x$points, .name_repair = "minimal") |
| 217 | + colnames(points) <- paste0("mds", 1:ncol(points)) |
| 218 | + |
| 219 | + plot(data = points, mds2 ~ mds1,...) |
| 220 | +} |
| 221 | +
|
| 222 | +autoplot.mds <- function(object, labels, ...) { |
| 223 | + points <- tibble::as_tibble(object$points, .name_repair = "minimal") |
| 224 | + colnames(points) <- paste0("mds", 1:ncol(points)) |
| 225 | + |
| 226 | + if (!missing(labels)) { |
| 227 | + if (length(labels) != nrow(points)) |
| 228 | + stop("You must provide a character vector of length ", nrow(points), |
| 229 | + " for 'labels'") |
| 230 | + points$.labels <- labels |
| 231 | + chart::chart(points, mds2 ~ mds1 %label=% .labels, ...) + |
| 232 | + geom_point() + |
| 233 | + ggrepel::geom_text_repel() + |
| 234 | + coord_fixed(ratio = 1) |
| 235 | + } else {# Plot without labels |
| 236 | + chart::chart(points, mds2 ~ mds1, ...) + |
| 237 | + geom_point() + |
| 238 | + coord_fixed(ratio = 1) |
| 239 | + } |
| 240 | +} |
| 241 | +
|
| 242 | +shepard <- function(dist, mds, p = 2) |
| 243 | + structure(MASS::Shepard(d = dist, x = mds$points, p = p), |
| 244 | + class = c("shepard", "list")) |
| 245 | +
|
| 246 | +plot.shepard <- function(x, y, l.col = "red", l.lwd = 1, |
| 247 | +xlab = "Observed Dissimilarity", ylab = "Ordination Distance", ...) { |
| 248 | + she <- tibble::as_tibble(x, .name_repair = "minimal") |
| 249 | + |
| 250 | + plot(data = she, y ~ x, xlab = xlab, ylab = ylab, ...) |
| 251 | + lines(data = she, yf ~ x, type = "S", col = l.col, lwd = l.lwd) |
| 252 | +} |
| 253 | +
|
| 254 | +autoplot.shepard <- function(object, alpha = 0.5, l.col = "red", l.lwd = 1, |
| 255 | +xlab = "Observed Dissimilarity", ylab = "Ordination Distance", ...) { |
| 256 | + she <- tibble::as_tibble(object) |
| 257 | + |
| 258 | + chart(data = she, y ~ x) + |
| 259 | + geom_point(alpha = alpha) + |
| 260 | + geom_step(chart::f_aes(yf ~ x), direction = "vh", col = l.col, lwd = l.lwd) + |
| 261 | + labs(x = xlab, y = ylab) |
| 262 | +} |
| 263 | +
|
| 264 | +# augment.mds ------------------------------------------- |
| 265 | +augment.mds <- function(x, data, ...){ |
| 266 | + points <- as_tibble(x$points) |
| 267 | + colnames(points) <- paste0(".mds", 1:ncol(points)) |
| 268 | + bind_cols(data, points) |
| 269 | +} |
| 270 | +
|
| 271 | +# glance.mds ------------------------------------------- |
| 272 | +glance.mds <- function(x, ...){ |
| 273 | + if ("GOF" %in% names(x)) {# Probably cmdscale() or wcmdscale() => metric MDS |
| 274 | + tibble::tibble(GOF1 = x$GOF[1], GOF2 = x$GOF[2]) |
| 275 | + } else {# Non metric MDS |
| 276 | + # Calculate linear and non linear R^2 from the Shepard (stress) plot |
| 277 | + tibble::tibble( |
| 278 | + linear_R2 = cor(x$Shepard$y, x$Shepard$yf)^2, |
| 279 | + nonmetric_R2 = 1 - sum(vegan::goodness(x)^2) |
| 280 | + ) |
| 281 | + } |
| 282 | +} |
| 283 | +``` |
| 284 | + |
| 285 | +## Analyse en coordonnées principales (ou MDS métrique) |
| 286 | + |
| 287 | +Réalisez une PCoA sur le jeu de données bci avec une matrice de distance de `Canberra`. |
| 288 | + |
| 289 | +```{r, echo = TRUE, eval = FALSE} |
| 290 | +# function de base |
| 291 | +dist. <- vegan::vegdist(x, method = "bray") |
| 292 | +mds. <- mds$metric(dist.) |
| 293 | +
|
| 294 | +autoplot(mds.) |
| 295 | +glance(mds.) |
| 296 | +``` |
| 297 | + |
| 298 | +```{r mds, exercise = TRUE} |
| 299 | +summary(bci) |
| 300 | +``` |
| 301 | + |
| 302 | +```{r mds-hint-1} |
| 303 | +dist. <- vegan::vegdist(x, method = "bray") |
| 304 | +mds. <- mds$metric(dist.) |
| 305 | +autplot(mds.) |
| 306 | +glance(mds.) |
| 307 | +``` |
| 308 | + |
| 309 | +```{r mds-hint-2} |
| 310 | +dist. <- vegan::vegdist(bci, method = "canberra") |
| 311 | +mds. <- mds$metric(dist.) |
| 312 | +autoplot(mds.) |
| 313 | +glance(mds.) |
| 314 | +``` |
| 315 | + |
| 316 | +```{r mds-check} |
| 317 | +# TODO |
| 318 | +``` |
| 319 | + |
| 320 | +```{r qu_mds} |
| 321 | +question("Considérez vous que cette PCoA est une bonne PCoA ?", |
| 322 | + answer("oui, sur base des valeurs de goodness-of-fit, |
| 323 | + nous pouvons observer que le PCoA exprime une |
| 324 | + grande part de la variance"), |
| 325 | + answer("non, sur base des valeurs de goodness-of-fit, |
| 326 | + nous pouvons observer que le PCoA n'exprime pas |
| 327 | + une grande part de la variance. Il est préférable |
| 328 | + de réaliser une MDS non métrique.", |
| 329 | + correct = TRUE), |
| 330 | + allow_retry = TRUE) |
| 331 | +``` |
| 332 | + |
| 333 | +## MDS non métrique |
| 334 | + |
| 335 | +```{r, echo = TRUE, eval = FALSE} |
| 336 | +# function de base |
| 337 | +dist. <- vegan::vegdist(x, method = "bray") |
| 338 | +mds. <- mds$nonmetric(dist.) |
| 339 | +
|
| 340 | +autoplot(mds.) |
| 341 | +glance(mds.) |
| 342 | +``` |
| 343 | + |
| 344 | +```{r nmds, exercise = TRUE} |
| 345 | +summary(bci) |
| 346 | +``` |
| 347 | + |
| 348 | +```{r nmds-hint-1} |
| 349 | +dist. <- vegan::vegdist(x, method = "bray") |
| 350 | +mds. <- mds$nonmetric(dist.) |
| 351 | +autplot(mds.) |
| 352 | +glance(mds.) |
| 353 | +``` |
| 354 | + |
| 355 | +```{r nmds-hint-2} |
| 356 | +dist. <- vegan::vegdist(bci, method = "canberra") |
| 357 | +mds. <- mds$nonmetric(dist.) |
| 358 | +autoplot(mds.) |
| 359 | +glance(mds.) |
| 360 | +``` |
| 361 | + |
| 362 | +```{r nmds-check} |
| 363 | +# TODO |
| 364 | +``` |
| 365 | + |
| 366 | +```{r qu_nmds} |
| 367 | +question("Considérez vous que cette MDS non métrique est une |
| 368 | + MDS non métrique de qualité?", |
| 369 | + answer("oui, sur base de la valeur du R^2 linéaire élevée, |
| 370 | + nous pouvons considérer que la MDS non métrique |
| 371 | + est exprime une grande part de la variance"), |
| 372 | + answer("oui, sur base des valeurs de R^2 linéaire et de R^2 |
| 373 | + non métrique élevées, nous pouvons considérer que la |
| 374 | + MDS non métrique est de qualité"), |
| 375 | + answer("oui, sur base de la valeur du R^2 non métrique élevée, |
| 376 | + nous pouvons considérer que la MDS non métrique |
| 377 | + est de qualité", correct = TRUE), |
| 378 | + answer("non, sur base des valeurs R^2 linéaire et de R^2 |
| 379 | + non métrique élevées, nous pouvons considérer |
| 380 | + que la MDS non métrique n'est pas optimale."), |
| 381 | + allow_retry = TRUE) |
| 382 | +``` |
| 383 | + |
| 384 | + |
| 385 | +## Conclusion |
| 386 | + |
| 387 | +Vous venez de terminer votre séance d'exercice. |
| 388 | + |
| 389 | +Laissez nous vos impressions sur cet outil pédagogique ou expérimentez encore dans la zone ci-dessous. Rappelez-vous que pour placer un commentaire dans une zone de code R, vous devez utilisez un dièse (`#`) devant vos phrases. |
| 390 | + |
| 391 | +```{r comm, exercise=TRUE, exercise.lines = 8} |
| 392 | +# Ajout de commentaires |
| 393 | +# ... |
| 394 | +``` |
| 395 | + |
| 396 | +```{r comm-check} |
| 397 | +# Not yet... |
| 398 | +``` |
0 commit comments