Go over vignettes and fix

welch-lab · Mar 16, 2024 · 7433de5 · 7433de5
1 parent 2f3997c
commit 7433de5
Show file tree

Hide file tree

Showing 18 changed files with 181 additions and 274 deletions.
diff --git a/R/ATAC.R b/R/ATAC.R
@@ -356,6 +356,7 @@ linkGenesAndPeaks <- function(
 #' @return No return value. A file located at \code{outputPath} will be created.
 #' @export
 #' @examples
+#' \donttest{
 #' bmmc <- normalize(bmmc)
 #' bmmc <- selectGenes(bmmc)
 #' bmmc <- scaleNotCenter(bmmc)
@@ -376,6 +377,7 @@ linkGenesAndPeaks <- function(
 #'     )
 #'     head(read.table(resultPath, skip = 1))
 #' }
+#' }
 exportInteractTrack <- function(
         corrMat,
         pathToCoords,

diff --git a/R/ggplotting.R b/R/ggplotting.R
@@ -588,8 +588,9 @@ plotCellViolin <- function(
 #' @param colorLabels,colorValues Each a vector with as many values as the
 #' number of categories for the categorical coloring aesthetics. Labels will be
 #' the shown text and values will be the color code. These are passed to
-#' \code{\link[ggplot2]{scale_color_manual}}. Default uses \code{scPalette} and
-#' plot original labels (levels of the factor).
+#' \code{\link[ggplot2]{scale_color_manual}}. Default uses an internal selected
+#' palette if there are <= 26 colors needed, or ggplot hues otherwise, and plot
+#' original labels (levels of the factor).
 #' @param legendNRow,legendNCol Integer, when too many categories in one
 #' variable, arranges number of rows or columns. Default \code{NULL},
 #' automatically split to \code{ceiling(levels(variable)/10)} columns.
@@ -641,7 +642,7 @@ plotCellViolin <- function(
         legendNCol = NULL,
         # Coloring
         colorLabels = NULL,
-        colorValues = scPalette,
+        colorValues = NULL,
         colorPalette = "magma",
         colorDirection = -1,
         naColor = "#DEDEDE",
@@ -741,9 +742,13 @@ plotCellViolin <- function(
                 colorLabels <- levels(plot$data[[varName]])
             }
             if (is.null(colorValues)) {
-                colorValues <- scales::hue_pal()(
-                    length(levels(plot$data[[varName]]))
-                )
+                if (nlevels(plot$data[[varName]]) <= length(scPalette))
+                    colorValues <- scPalette[seq_len(nlevels(plot$data[[varName]]))]
+                else {
+                    colorValues <- scales::hue_pal()(
+                        length(levels(plot$data[[varName]]))
+                    )
+                }
             }
             if (a %in% c("colour", "fill")) {
                 plot <- plot +

diff --git a/R/liger-methods.R b/R/liger-methods.R
@@ -595,14 +595,14 @@ setReplaceMethod(
                 # No name matching, require exact length
                 value <- .checkArgLen(value, n = length(cellIdx), class = c("vector", "factor"))
             } else {
-                if (!all(barcodes %in% names(value))) {
+                if (!all(endsWith(barcodes, names(value)))) {
                     cli::cli_abort(
-                        c("{.code names(value)} do not contain all cells selected. ",
-                          "These are not involved: ",
-                          "{.val {barcodes[!barcodes %in% names(value)]}}")
+                        c("x" = "{.code names(value)} do match to cells selected. ",
+                          "i" = "The first three given names: {.val {names(value)[1:3]}}",
+                          "i" = "The first three selected names: {.val {barocdes[1:3]}}")
                     )
                 }
-                value <- value[barcodes]
+                names(value) <- barcodes
             }
         } else {
             # matrix like
@@ -873,9 +873,9 @@ setReplaceMethod(
     function(x, dataset = NULL, check = TRUE, value) {
         dataset <- .checkUseDatasets(x, dataset)
         if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
-        if (isH5Liger(x, dataset))
-            cli::cli_abort("Cannot replace slot with in-memory data for H5 based object.")
-        x@datasets[[dataset]]@scaleUnsharedData <- value
+        ld <- dataset(x, dataset)
+        scaleUnsharedData(ld) <- value
+        x@datasets[[dataset]] <- ld
         if (isTRUE(check)) methods::validObject(x)
         x
     }
@@ -889,9 +889,9 @@ setReplaceMethod(
     function(x, dataset = NULL, check = TRUE, value) {
         dataset <- .checkUseDatasets(x, dataset)
         if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
-        if (!isH5Liger(x, dataset))
-            cli::cli_abort("Cannot replace slot with on-disk data for in-memory object.")
-        x@datasets[[dataset]]@scaleUnsharedData <- value
+        ld <- dataset(x, dataset)
+        scaleUnsharedData(ld) <- value
+        x@datasets[[dataset]] <- ld
         if (isTRUE(check)) methods::validObject(x)
         x
     }
@@ -907,7 +907,9 @@ setReplaceMethod(
         if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
         if (!isH5Liger(x, dataset))
             cli::cli_abort("Cannot replace slot with on-disk data for in-memory object.")
-        x@datasets[[dataset]]@scaleUnsharedData <- value
+        ld <- dataset(x, dataset)
+        scaleUnsharedData(ld) <- value
+        x@datasets[[dataset]] <- ld
         if (isTRUE(check)) methods::validObject(x)
         x
     }
@@ -1140,7 +1142,7 @@ setReplaceMethod(
     signature(x = "liger", value = "list"),
     function(x, value) {
         x@dimReds <- value
-        validObject(x)
+        methods::validObject(x)
         return(x)
     }
 )

diff --git a/R/ligerDataset-methods.R b/R/ligerDataset-methods.R
@@ -408,6 +408,15 @@ setReplaceMethod(
     function(x, check = TRUE, value) {
         if (isH5Liger(x))
             cli::cli_abort("Cannot replace slot with in-memory data for H5 based object.")
+        if (!is.null(value)) {
+            bc <- x@colnames
+            if (!all(endsWith(bc, colnames(value)))) {
+                cli::cli_abort(
+                    "Column names of {.var value} do not match to those of the object."
+                )
+            }
+            colnames(value) <- bc
+        }
         x@scaleUnsharedData <- value
         if (isTRUE(check)) methods::validObject(x)
         x

diff --git a/R/subsetObject.R b/R/subsetObject.R
@@ -320,9 +320,6 @@ subsetH5LigerDataset <- function(
             path <- dirname(oldFN)
             filename <- tempfile(pattern = paste0(bn, ".subset_"),
                                  fileext = ".h5", tmpdir = path)
-            # filename <- paste0(oldFN, ".subset_",
-            #                    format(Sys.time(), "%y%m%d_%H%M%S"),
-            #                    ".h5")
         } else if (is.null(filename) && !is.null(filenameSuffix)) {
             oldFN <- h5fileInfo(object, "filename")
             filename <- paste0(oldFN, ".", filenameSuffix, ".h5")
@@ -374,6 +371,8 @@ subsetH5LigerDatasetToMem <- function(
     }
     modal <- modalOf(object)
     cellIdx <- .idxCheck(object, cellIdx, "cell")
+    if (is.null(featureIdx)) noFeatureIdx <- TRUE
+    else noFeatureIdx <- FALSE
     featureIdx <- .idxCheck(object, featureIdx, "feature")
     # Having `useSlot` as a record of user specification, to know whether it's
     # a `NULL` but involve everything, or just a few slots.
@@ -418,9 +417,16 @@ subsetH5LigerDatasetToMem <- function(
     # Process scaled data ####
     secondIdx <- NULL
     if (!is.null(scaleData(object))) {
-        # See comments in h5ToH5 for what the following two mean
+        # scaledFeatureIdx indicates where each feature in scaled data is from
+        # the dataset.
         scaledFeatureIdx <- scaleData(object)[["featureIdx"]][]
-        secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
+        # `secondIdx` is used to subscribe scaleData and V, so that result
+        # after subset match with the order requested by `featureIdx`
+        if (noFeatureIdx) {
+            secondIdx <- seq_along(scaledFeatureIdx)
+        } else {
+            secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
+        }
     }
     if ("scaleData" %in% slotInvolved & !is.null(scaleData(object))) {
         if (isTRUE(verbose))
@@ -538,6 +544,8 @@ subsetH5LigerDatasetToH5 <- function(
     }
     modal <- modalOf(object)
     cellIdx <- .idxCheck(object, cellIdx, "cell")
+    if (is.null(featureIdx)) noFeatureIdx <- TRUE
+    else noFeatureIdx <- FALSE
     featureIdx <- .idxCheck(object, featureIdx, "feature")
     useSlot <- .checkLDSlot(object, useSlot)
 
@@ -672,26 +680,17 @@ subsetH5LigerDatasetToH5 <- function(
     }
     # Process Scaled Data ####
     secondIdx <- NULL
-    # if (getH5File(object)$exists("scaleData.featureIdx")) {
-    #     scaledFeatureIdx <- getH5File(object)[["scaleData.featureIdx"]][]
-    # } else if ("selected" %in% names(featureMeta(object))) {
-    #     scaledFeatureIdx <- which(featureMeta(object)$selected)
-    # } else if (!is.null(object@V)) {
-    #     scaleFeatureIdx <- rownames(object@V) %in% rownames(object)[featureIdx]
-    # } else {
-    #     if ("scaleData" %in% useSlot & !is.null(scaleData(object))) {
-    #         warning("Unable to know what features are included scaled data. ",
-    #                 "Skipped.")
-    #     }
-    # }
     if (!is.null(scaleData(object))) {
-        # This asserts that
-        # identical(rownames(object)[scaledFeatureIdx], rownames(scaleData))
+        # scaledFeatureIdx indicates where each feature in scaled data is from
+        # the dataset.
         scaledFeatureIdx <- scaleData(object)[["featureIdx"]][]
-        # This asserts that
-        # rownames(scaleData)[secondIdx] returns a subset that follows the order
-        # specified by featureIdx
-        secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
+        # `secondIdx` is used to subscribe scaleData and V, so that result
+        # after subset match with the order requested by `featureIdx`
+        if (noFeatureIdx) {
+            secondIdx <- seq_along(scaledFeatureIdx)
+        } else {
+            secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
+        }
     }
     if ("scaleData" %in% useSlot & !is.null(scaleData(object))) {
         scaledFeatureIdxNew <- which(featureIdx %in% scaledFeatureIdx)

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -1,6 +1,15 @@
 template:
-  params:
-    bootswatch: flatly
+  bootstrap: 5
+  theme: flatly
+  bslib:
+    bg: "#202123"
+    fg: "#B8BCC2"
+    primary: "#306cc9"
+    border-radius: 0.5rem
+    btn-border-radius: 0.25rem
+  includes:
+    in_header:
+      <script defer data-domain="welch-lab.github.io/liger" src="https://plausible.io/js/plausible.js"></script>
 
 reference:
 - title: "Read Data"

diff --git a/docs/articles/img/liger_class_structure.png b/docs/articles/img/liger_class_structure.png
diff --git a/man/dot-ggplotLigerTheme.Rd b/man/dot-ggplotLigerTheme.Rd
diff --git a/man/exportInteractTrack.Rd b/man/exportInteractTrack.Rd
diff --git a/tests/testthat/test_object.R b/tests/testthat/test_object.R
@@ -424,7 +424,7 @@ test_that("as.liger methods", {
         expect_message(lig <- as.liger(seu))
         expect_true(all.equal(sapply(datasets(lig), ncol), c(a = 150, b = 150)))
 
-        expect_in(paste0("pca.", 1:10), colnames(cellMeta(lig, as.data.frame = TRUE)))
+        expect_in("pca", names(dimReds(lig)))
     }
 })
 

diff --git a/vignettes/articles/Integrating_multi_scRNA_data.rmd b/vignettes/articles/Integrating_multi_scRNA_data.rmd
@@ -10,7 +10,7 @@ output:
 ---
 
 ```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache = TRUE)
+knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
 ```
 
 This guide will demonstrate the usage of the Liger package in the style of the R Console, which can be accessed through an R development environment (e.g., RStudio) or directly from the R command line.

diff --git a/vignettes/articles/Integrating_scRNA_and_scATAC_data.Rmd b/vignettes/articles/Integrating_scRNA_and_scATAC_data.Rmd
@@ -9,7 +9,7 @@ output:
 ---
 
 ```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache = TRUE)
+knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
 ```
 
 In this tutorial, we will demonstrate LIGER’s ability to jointly define cell types by leveraging multiple single-cell modalities. For example, the integration of single-cell RNA-seq and single-cell ATAC-seq enables cell type definitions that incorporate both gene expression and chromatin accessibility data. Such joint analysis allows not only the taxonomic categorization of cell types, but also a deeper understanding of their underlying regulatory networks. The pipeline for jointly analyzing scRNA-seq and scATAC-seq is similar to that for integrating multiple scRNA-seq datasets in that both rely on joint matrix factorization and quantile normalization. The main differences are: (1) scATAC-seq data needs to be processed into gene-level values; (2) gene selection is performed on the scRNA-seq data only; and (3) downstream analyses can use both gene-level and intergenic information.

diff --git a/vignettes/articles/SNAREseq_walkthrough.Rmd b/vignettes/articles/SNAREseq_walkthrough.Rmd
@@ -10,7 +10,7 @@ output: html_document
 knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
 ```
 
-Here we integrate the scATAC and scRNA reads from the dual-omics dataset SNARE-seq as an illustration of how UINMF can be used to integrate cross-modality data. For this tutorial, we will use three matrices, which can all be downloaded at https://www.dropbox.com/sh/y9kjoum8u469nj1/AADik2b2-Qo3os2QSWXdIAbna?dl=0
+Here we integrate the scATAC and scRNA reads from the dual-omics dataset SNARE-seq as an illustration of how UINMF can be used to integrate cross-modality data. For this tutorial, we will use three matrices, which can all be downloaded from our [Dropbox folder](https://www.dropbox.com/scl/fo/ea666n049h762lla8c5iz/h?rlkey=796slf2ybnv2kmcxxbwluvv2b&dl=0)
 
 - The transcriptomic measures `SNAREseq_RNA.RDS` is the SNARE-seq scRNA dataset (31,367 genes by 10,309 cells).
 - For the shared epigenomic features `SNARE_seq_shared_chromatin_features.RDS`, we create a gene-centric matrix, such that we sum of the number of accessibiltiy peaks that occur over the gene body and promoter regions for each gene. For a detailed walkthough of how to generate such a matrix, please see our [Integrating scRNA and scATAC data vignette](Integrating_scRNA_and_scATAC_data.html). The resulting matrix of gene-centric chromatin accessibility is 22,379 genes by 10,309 cells
@@ -24,28 +24,27 @@ library(rliger)
 rna <- readRDS("SNAREseq_data/SNAREseq_RNA.RDS")
 atac_shared <- readRDS("SNAREseq_data/SNAREseq_chromatin_accessibility_shared.RDS")
 atac_unshared <- readRDS("SNAREseq_data/SNARE_seq_unshared_chromatin_features.RDS")
-atac_unshared <- as(atac_unshared, "CsparseMatrix")
 ```
 
 ## Step 2: Preprocessing
 
 Integrative non-negative matrix factorization with unshared features (UINMF) performs factorization using shared feature matrix of all datasets and includes unshared features from dataset(s) with such information. In this tutorial, we plan to use the variable feature selected from the scRNA dataset. Since we prepared the gene-centric matrix from the scATAC dataset, these genes can be accounted as the shared features. For the unshared features, normally we select variable genes that are out of the intersection of gene sets from all datasets. In this tutorial, we directly use the peaks that are identified variable. Therefore, special processing will be needed compared to the other UINMF tutorials. 
 
-### 1. Create liger object for shared genes
+### 2.1: Create liger object for shared genes
 
 ```{r create}
 lig <- createLiger(list(rna = rna, atac = atac_shared))
 ```
 
-### 2. Normalize and select shared variable features
+### 2.2: Normalize and select shared variable features
 
 ```{r norm}
 lig <- normalize(lig) %>%
     selectGenes(useDatasets = "rna", thresh = 0.1) %>%
     scaleNotCenter()
 ```
 
-## 3. Selecting the unshared features
+## Step 3: Selecting the unshared features
 
 When selecting unshared features for the UINMF integration, it is critical to consider the type of data you are working with. For unshared features that gene-centric, the user should follow the feature selection process outlined in the ['Integrating unshared features with UINMF' tutorial](UINMF_vignette.html).
 
@@ -77,20 +76,18 @@ Add the unshared features that have been properly selected, such that they are a
 
 ```{r addunshared}
 varUnsharedFeatures(lig, "atac") <- top2000
-scaleUnsharedData(lig, "atac", check = FALSE) <- unshareScaled
+scaleUnsharedData(lig, "atac") <- unshareScaled
 ```
 
->The latest version of rliger package is equipped with strict checkpoints on object validity, including the matching of features and cells between matrices belonging to the same dataset. Given that the special processing method we are introducing in this vignette, that we use the intergenic bins as unshared features, we have to turn the checks off with `check = FALSE` when inserting the scaled unshared features into the object.
-
-## 4. Joint Matrix Factorization
+## Step 4: Joint Matrix Factorization
 
 To factorize the datasets and include the unshared datasets, set `useUnshared = TRUE`. 
 
 ```{r factorization}
 lig <- runUINMF(lig, k = 30, nIteration = 30)
 ```
 
-## 5. Quantile Normalization and Joint Clustering
+## Step 5: Quantile Normalization and Joint Clustering
 
 After factorization, the resulting Liger object can be used in all downstream LIGER functions without adjustment. The default reference dataset for quantile normalization is the larger dataset, but the user should select the higher quality dataset as the reference dataset, even if it is the smaller dataset.
 
@@ -100,7 +97,7 @@ lig <- quantileNorm(lig)
 lig <- runCluster(lig, resolution = 0.8, nNeighbors = 30)
 ```
 
-## 6. Visualizations and Downstream processing
+## Step 6: Visualizations and Downstream processing
 
 ```{r runumap}
 lig <- runUMAP(lig, nNeighbors = 30)