Skip to content

Commit

Permalink
Go over vignettes and fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mvfki committed Mar 16, 2024
1 parent 2f3997c commit 7433de5
Show file tree
Hide file tree
Showing 18 changed files with 181 additions and 274 deletions.
2 changes: 2 additions & 0 deletions R/ATAC.R
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ linkGenesAndPeaks <- function(
#' @return No return value. A file located at \code{outputPath} will be created.
#' @export
#' @examples
#' \donttest{
#' bmmc <- normalize(bmmc)
#' bmmc <- selectGenes(bmmc)
#' bmmc <- scaleNotCenter(bmmc)
Expand All @@ -376,6 +377,7 @@ linkGenesAndPeaks <- function(
#' )
#' head(read.table(resultPath, skip = 1))
#' }
#' }
exportInteractTrack <- function(
corrMat,
pathToCoords,
Expand Down
17 changes: 11 additions & 6 deletions R/ggplotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,9 @@ plotCellViolin <- function(
#' @param colorLabels,colorValues Each a vector with as many values as the
#' number of categories for the categorical coloring aesthetics. Labels will be
#' the shown text and values will be the color code. These are passed to
#' \code{\link[ggplot2]{scale_color_manual}}. Default uses \code{scPalette} and
#' plot original labels (levels of the factor).
#' \code{\link[ggplot2]{scale_color_manual}}. Default uses an internal selected
#' palette if there are <= 26 colors needed, or ggplot hues otherwise, and plot
#' original labels (levels of the factor).
#' @param legendNRow,legendNCol Integer, when too many categories in one
#' variable, arranges number of rows or columns. Default \code{NULL},
#' automatically split to \code{ceiling(levels(variable)/10)} columns.
Expand Down Expand Up @@ -641,7 +642,7 @@ plotCellViolin <- function(
legendNCol = NULL,
# Coloring
colorLabels = NULL,
colorValues = scPalette,
colorValues = NULL,
colorPalette = "magma",
colorDirection = -1,
naColor = "#DEDEDE",
Expand Down Expand Up @@ -741,9 +742,13 @@ plotCellViolin <- function(
colorLabels <- levels(plot$data[[varName]])
}
if (is.null(colorValues)) {
colorValues <- scales::hue_pal()(
length(levels(plot$data[[varName]]))
)
if (nlevels(plot$data[[varName]]) <= length(scPalette))
colorValues <- scPalette[seq_len(nlevels(plot$data[[varName]]))]
else {
colorValues <- scales::hue_pal()(
length(levels(plot$data[[varName]]))
)
}
}
if (a %in% c("colour", "fill")) {
plot <- plot +
Expand Down
28 changes: 15 additions & 13 deletions R/liger-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -595,14 +595,14 @@ setReplaceMethod(
# No name matching, require exact length
value <- .checkArgLen(value, n = length(cellIdx), class = c("vector", "factor"))
} else {
if (!all(barcodes %in% names(value))) {
if (!all(endsWith(barcodes, names(value)))) {
cli::cli_abort(
c("{.code names(value)} do not contain all cells selected. ",
"These are not involved: ",
"{.val {barcodes[!barcodes %in% names(value)]}}")
c("x" = "{.code names(value)} do match to cells selected. ",
"i" = "The first three given names: {.val {names(value)[1:3]}}",
"i" = "The first three selected names: {.val {barocdes[1:3]}}")
)
}
value <- value[barcodes]
names(value) <- barcodes
}
} else {
# matrix like
Expand Down Expand Up @@ -873,9 +873,9 @@ setReplaceMethod(
function(x, dataset = NULL, check = TRUE, value) {
dataset <- .checkUseDatasets(x, dataset)
if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
if (isH5Liger(x, dataset))
cli::cli_abort("Cannot replace slot with in-memory data for H5 based object.")
x@datasets[[dataset]]@scaleUnsharedData <- value
ld <- dataset(x, dataset)
scaleUnsharedData(ld) <- value
x@datasets[[dataset]] <- ld
if (isTRUE(check)) methods::validObject(x)
x
}
Expand All @@ -889,9 +889,9 @@ setReplaceMethod(
function(x, dataset = NULL, check = TRUE, value) {
dataset <- .checkUseDatasets(x, dataset)
if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
if (!isH5Liger(x, dataset))
cli::cli_abort("Cannot replace slot with on-disk data for in-memory object.")
x@datasets[[dataset]]@scaleUnsharedData <- value
ld <- dataset(x, dataset)
scaleUnsharedData(ld) <- value
x@datasets[[dataset]] <- ld
if (isTRUE(check)) methods::validObject(x)
x
}
Expand All @@ -907,7 +907,9 @@ setReplaceMethod(
if (length(dataset) != 1) cli::cli_abort("Need to specify one dataset to insert.")
if (!isH5Liger(x, dataset))
cli::cli_abort("Cannot replace slot with on-disk data for in-memory object.")
x@datasets[[dataset]]@scaleUnsharedData <- value
ld <- dataset(x, dataset)
scaleUnsharedData(ld) <- value
x@datasets[[dataset]] <- ld
if (isTRUE(check)) methods::validObject(x)
x
}
Expand Down Expand Up @@ -1140,7 +1142,7 @@ setReplaceMethod(
signature(x = "liger", value = "list"),
function(x, value) {
x@dimReds <- value
validObject(x)
methods::validObject(x)
return(x)
}
)
Expand Down
9 changes: 9 additions & 0 deletions R/ligerDataset-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,15 @@ setReplaceMethod(
function(x, check = TRUE, value) {
if (isH5Liger(x))
cli::cli_abort("Cannot replace slot with in-memory data for H5 based object.")
if (!is.null(value)) {
bc <- x@colnames
if (!all(endsWith(bc, colnames(value)))) {
cli::cli_abort(
"Column names of {.var value} do not match to those of the object."
)
}
colnames(value) <- bc
}
x@scaleUnsharedData <- value
if (isTRUE(check)) methods::validObject(x)
x
Expand Down
45 changes: 22 additions & 23 deletions R/subsetObject.R
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,6 @@ subsetH5LigerDataset <- function(
path <- dirname(oldFN)
filename <- tempfile(pattern = paste0(bn, ".subset_"),
fileext = ".h5", tmpdir = path)
# filename <- paste0(oldFN, ".subset_",
# format(Sys.time(), "%y%m%d_%H%M%S"),
# ".h5")
} else if (is.null(filename) && !is.null(filenameSuffix)) {
oldFN <- h5fileInfo(object, "filename")
filename <- paste0(oldFN, ".", filenameSuffix, ".h5")
Expand Down Expand Up @@ -374,6 +371,8 @@ subsetH5LigerDatasetToMem <- function(
}
modal <- modalOf(object)
cellIdx <- .idxCheck(object, cellIdx, "cell")
if (is.null(featureIdx)) noFeatureIdx <- TRUE
else noFeatureIdx <- FALSE
featureIdx <- .idxCheck(object, featureIdx, "feature")
# Having `useSlot` as a record of user specification, to know whether it's
# a `NULL` but involve everything, or just a few slots.
Expand Down Expand Up @@ -418,9 +417,16 @@ subsetH5LigerDatasetToMem <- function(
# Process scaled data ####
secondIdx <- NULL
if (!is.null(scaleData(object))) {
# See comments in h5ToH5 for what the following two mean
# scaledFeatureIdx indicates where each feature in scaled data is from
# the dataset.
scaledFeatureIdx <- scaleData(object)[["featureIdx"]][]
secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
# `secondIdx` is used to subscribe scaleData and V, so that result
# after subset match with the order requested by `featureIdx`
if (noFeatureIdx) {
secondIdx <- seq_along(scaledFeatureIdx)
} else {
secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
}
}
if ("scaleData" %in% slotInvolved & !is.null(scaleData(object))) {
if (isTRUE(verbose))
Expand Down Expand Up @@ -538,6 +544,8 @@ subsetH5LigerDatasetToH5 <- function(
}
modal <- modalOf(object)
cellIdx <- .idxCheck(object, cellIdx, "cell")
if (is.null(featureIdx)) noFeatureIdx <- TRUE
else noFeatureIdx <- FALSE
featureIdx <- .idxCheck(object, featureIdx, "feature")
useSlot <- .checkLDSlot(object, useSlot)

Expand Down Expand Up @@ -672,26 +680,17 @@ subsetH5LigerDatasetToH5 <- function(
}
# Process Scaled Data ####
secondIdx <- NULL
# if (getH5File(object)$exists("scaleData.featureIdx")) {
# scaledFeatureIdx <- getH5File(object)[["scaleData.featureIdx"]][]
# } else if ("selected" %in% names(featureMeta(object))) {
# scaledFeatureIdx <- which(featureMeta(object)$selected)
# } else if (!is.null(object@V)) {
# scaleFeatureIdx <- rownames(object@V) %in% rownames(object)[featureIdx]
# } else {
# if ("scaleData" %in% useSlot & !is.null(scaleData(object))) {
# warning("Unable to know what features are included scaled data. ",
# "Skipped.")
# }
# }
if (!is.null(scaleData(object))) {
# This asserts that
# identical(rownames(object)[scaledFeatureIdx], rownames(scaleData))
# scaledFeatureIdx indicates where each feature in scaled data is from
# the dataset.
scaledFeatureIdx <- scaleData(object)[["featureIdx"]][]
# This asserts that
# rownames(scaleData)[secondIdx] returns a subset that follows the order
# specified by featureIdx
secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
# `secondIdx` is used to subscribe scaleData and V, so that result
# after subset match with the order requested by `featureIdx`
if (noFeatureIdx) {
secondIdx <- seq_along(scaledFeatureIdx)
} else {
secondIdx <- as.numeric(stats::na.omit(match(featureIdx, scaledFeatureIdx)))
}
}
if ("scaleData" %in% useSlot & !is.null(scaleData(object))) {
scaledFeatureIdxNew <- which(featureIdx %in% scaledFeatureIdx)
Expand Down
13 changes: 11 additions & 2 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
template:
params:
bootswatch: flatly
bootstrap: 5
theme: flatly
bslib:
bg: "#202123"
fg: "#B8BCC2"
primary: "#306cc9"
border-radius: 0.5rem
btn-border-radius: 0.25rem
includes:
in_header:
<script defer data-domain="welch-lab.github.io/liger" src="https://plausible.io/js/plausible.js"></script>

reference:
- title: "Read Data"
Expand Down
Binary file modified docs/articles/img/liger_class_structure.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 4 additions & 3 deletions man/dot-ggplotLigerTheme.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/exportInteractTrack.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_object.R
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ test_that("as.liger methods", {
expect_message(lig <- as.liger(seu))
expect_true(all.equal(sapply(datasets(lig), ncol), c(a = 150, b = 150)))

expect_in(paste0("pca.", 1:10), colnames(cellMeta(lig, as.data.frame = TRUE)))
expect_in("pca", names(dimReds(lig)))
}
})

Expand Down
2 changes: 1 addition & 1 deletion vignettes/articles/Integrating_multi_scRNA_data.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ output:
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache = TRUE)
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
```

This guide will demonstrate the usage of the Liger package in the style of the R Console, which can be accessed through an R development environment (e.g., RStudio) or directly from the R command line.
Expand Down
2 changes: 1 addition & 1 deletion vignettes/articles/Integrating_scRNA_and_scATAC_data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output:
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache = TRUE)
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
```

In this tutorial, we will demonstrate LIGER’s ability to jointly define cell types by leveraging multiple single-cell modalities. For example, the integration of single-cell RNA-seq and single-cell ATAC-seq enables cell type definitions that incorporate both gene expression and chromatin accessibility data. Such joint analysis allows not only the taxonomic categorization of cell types, but also a deeper understanding of their underlying regulatory networks. The pipeline for jointly analyzing scRNA-seq and scATAC-seq is similar to that for integrating multiple scRNA-seq datasets in that both rely on joint matrix factorization and quantile normalization. The main differences are: (1) scATAC-seq data needs to be processed into gene-level values; (2) gene selection is performed on the scRNA-seq data only; and (3) downstream analyses can use both gene-level and intergenic information.
Expand Down
19 changes: 8 additions & 11 deletions vignettes/articles/SNAREseq_walkthrough.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ output: html_document
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
```

Here we integrate the scATAC and scRNA reads from the dual-omics dataset SNARE-seq as an illustration of how UINMF can be used to integrate cross-modality data. For this tutorial, we will use three matrices, which can all be downloaded at https://www.dropbox.com/sh/y9kjoum8u469nj1/AADik2b2-Qo3os2QSWXdIAbna?dl=0
Here we integrate the scATAC and scRNA reads from the dual-omics dataset SNARE-seq as an illustration of how UINMF can be used to integrate cross-modality data. For this tutorial, we will use three matrices, which can all be downloaded from our [Dropbox folder](https://www.dropbox.com/scl/fo/ea666n049h762lla8c5iz/h?rlkey=796slf2ybnv2kmcxxbwluvv2b&dl=0)

- The transcriptomic measures `SNAREseq_RNA.RDS` is the SNARE-seq scRNA dataset (31,367 genes by 10,309 cells).
- For the shared epigenomic features `SNARE_seq_shared_chromatin_features.RDS`, we create a gene-centric matrix, such that we sum of the number of accessibiltiy peaks that occur over the gene body and promoter regions for each gene. For a detailed walkthough of how to generate such a matrix, please see our [Integrating scRNA and scATAC data vignette](Integrating_scRNA_and_scATAC_data.html). The resulting matrix of gene-centric chromatin accessibility is 22,379 genes by 10,309 cells
Expand All @@ -24,28 +24,27 @@ library(rliger)
rna <- readRDS("SNAREseq_data/SNAREseq_RNA.RDS")
atac_shared <- readRDS("SNAREseq_data/SNAREseq_chromatin_accessibility_shared.RDS")
atac_unshared <- readRDS("SNAREseq_data/SNARE_seq_unshared_chromatin_features.RDS")
atac_unshared <- as(atac_unshared, "CsparseMatrix")
```

## Step 2: Preprocessing

Integrative non-negative matrix factorization with unshared features (UINMF) performs factorization using shared feature matrix of all datasets and includes unshared features from dataset(s) with such information. In this tutorial, we plan to use the variable feature selected from the scRNA dataset. Since we prepared the gene-centric matrix from the scATAC dataset, these genes can be accounted as the shared features. For the unshared features, normally we select variable genes that are out of the intersection of gene sets from all datasets. In this tutorial, we directly use the peaks that are identified variable. Therefore, special processing will be needed compared to the other UINMF tutorials.

### 1. Create liger object for shared genes
### 2.1: Create liger object for shared genes

```{r create}
lig <- createLiger(list(rna = rna, atac = atac_shared))
```

### 2. Normalize and select shared variable features
### 2.2: Normalize and select shared variable features

```{r norm}
lig <- normalize(lig) %>%
selectGenes(useDatasets = "rna", thresh = 0.1) %>%
scaleNotCenter()
```

## 3. Selecting the unshared features
## Step 3: Selecting the unshared features

When selecting unshared features for the UINMF integration, it is critical to consider the type of data you are working with. For unshared features that gene-centric, the user should follow the feature selection process outlined in the ['Integrating unshared features with UINMF' tutorial](UINMF_vignette.html).

Expand Down Expand Up @@ -77,20 +76,18 @@ Add the unshared features that have been properly selected, such that they are a

```{r addunshared}
varUnsharedFeatures(lig, "atac") <- top2000
scaleUnsharedData(lig, "atac", check = FALSE) <- unshareScaled
scaleUnsharedData(lig, "atac") <- unshareScaled
```

>The latest version of rliger package is equipped with strict checkpoints on object validity, including the matching of features and cells between matrices belonging to the same dataset. Given that the special processing method we are introducing in this vignette, that we use the intergenic bins as unshared features, we have to turn the checks off with `check = FALSE` when inserting the scaled unshared features into the object.
## 4. Joint Matrix Factorization
## Step 4: Joint Matrix Factorization

To factorize the datasets and include the unshared datasets, set `useUnshared = TRUE`.

```{r factorization}
lig <- runUINMF(lig, k = 30, nIteration = 30)
```

## 5. Quantile Normalization and Joint Clustering
## Step 5: Quantile Normalization and Joint Clustering

After factorization, the resulting Liger object can be used in all downstream LIGER functions without adjustment. The default reference dataset for quantile normalization is the larger dataset, but the user should select the higher quality dataset as the reference dataset, even if it is the smaller dataset.

Expand All @@ -100,7 +97,7 @@ lig <- quantileNorm(lig)
lig <- runCluster(lig, resolution = 0.8, nNeighbors = 30)
```

## 6. Visualizations and Downstream processing
## Step 6: Visualizations and Downstream processing

```{r runumap}
lig <- runUMAP(lig, nNeighbors = 30)
Expand Down
Loading

0 comments on commit 7433de5

Please sign in to comment.