fix #183 and #178

kharchenkolab · Apr 12, 2024 · d6b5c58 · d6b5c58
1 parent 9588475
commit d6b5c58
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 35 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: numbat
 Title: Haplotype-Aware CNV Analysis from scRNA-Seq
 URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
-Version: 1.4.0
+Version: 1.4.1
 Authors@R: c(person("Teng","Gao", email="tgaoteng@gmail.com", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
 Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at <https://kharchenkolab.github.io/numbat/>. For details on the method please see Gao et al. Nature Biotechnology (2022) <doi:10.1038/s41587-022-01468-y>.
 License: MIT + file LICENSE

diff --git a/R/main.R b/R/main.R
@@ -1190,9 +1190,9 @@ get_exp_likelihoods = function(exp_counts, diploid_chroms = NULL, use_loh = FALS
     if (is.null(mu) | is.null(sigma)) {
 
         if (!is.null(diploid_chroms)) {
-            exp_counts_diploid = exp_counts %>% filter(CHROM %in% diploid_chroms)
+            exp_counts_diploid = exp_counts %>% filter(!loh) %>% filter(CHROM %in% diploid_chroms)
         } else {
-            exp_counts_diploid = exp_counts %>% filter(cnv_state %in% ref_states)
+            exp_counts_diploid = exp_counts %>% filter(!loh) %>% filter(cnv_state %in% ref_states)
         }
 
         fit = exp_counts_diploid %>% {fit_lnpois_cpp(.$Y_obs, .$lambda_ref, depth_obs)}
@@ -1273,39 +1273,41 @@ get_exp_sc = function(segs_consensus, count_mat, gtf, segs_loh = NULL) {
         ) %>%
         ungroup()
 
-    if (!is.null(segs_loh)) {
-        exp_sc = exclude_loh(exp_sc, segs_loh)
-    }
+    exp_sc = exclude_loh(exp_sc, segs_loh)
 
     return(exp_sc)
 }
 
 # exclude genes in clonal LOH regions
-exclude_loh = function(exp_sc, segs_loh) {
+exclude_loh = function(exp_sc, segs_loh = NULL) {
 
-    log_message('Excluding clonal LOH regions .. ')
-
-    genes_loh = GenomicRanges::findOverlaps(
-            exp_sc %>% {GenomicRanges::GRanges(
-                seqnames = .$CHROM,
-                IRanges::IRanges(start = .$gene_start,
-                       end = .$gene_start)
-            )}, 
-            segs_loh %>% {GenomicRanges::GRanges(
-                seqnames = .$CHROM,
-                IRanges::IRanges(start = .$seg_start,
-                       end = .$seg_end)
-            )}
-        ) %>%
-        as.data.frame() %>% 
-        setNames(c('gene_index', 'seg_index')) %>%
-        left_join(
-            exp_sc %>% mutate(gene_index = 1:n()),
-            by = c('gene_index')
-        ) %>%
-        pull(gene)
-
-    exp_sc = exp_sc %>% mutate(cnv_state = ifelse(gene %in% genes_loh, 'del', cnv_state)) 
+    if (is.null(segs_loh)) {
+        exp_sc = exp_sc %>% mutate(loh = FALSE)
+    } else {
+        log_message('Excluding clonal LOH regions .. ')
+
+        genes_loh = GenomicRanges::findOverlaps(
+                exp_sc %>% {GenomicRanges::GRanges(
+                    seqnames = .$CHROM,
+                    IRanges::IRanges(start = .$gene_start,
+                        end = .$gene_start)
+                )}, 
+                segs_loh %>% {GenomicRanges::GRanges(
+                    seqnames = .$CHROM,
+                    IRanges::IRanges(start = .$seg_start,
+                        end = .$seg_end)
+                )}
+            ) %>%
+            as.data.frame() %>% 
+            setNames(c('gene_index', 'seg_index')) %>%
+            left_join(
+                exp_sc %>% mutate(gene_index = 1:n()),
+                by = c('gene_index')
+            ) %>%
+            pull(gene)
+
+        exp_sc = exp_sc %>% mutate(loh = gene %in% genes_loh) 
+    }
 
     return(exp_sc)
 
@@ -1323,7 +1325,7 @@ get_exp_post = function(segs_consensus, count_mat, gtf, lambdas_ref, sc_refs = N
     exp_sc = get_exp_sc(segs_consensus, count_mat, gtf, segs_loh)
 
     if (is.null(use_loh)) {
-        if (mean(exp_sc$cnv_state == 'neu') < 0.05) {
+        if (mean(exp_sc$cnv_state == 'neu' & (!exp_sc$loh)) < 0.05) {
             use_loh = TRUE
             log_message('less than 5% genes are in neutral region - including LOH in baseline')
         } else {
@@ -1346,7 +1348,7 @@ get_exp_post = function(segs_consensus, count_mat, gtf, lambdas_ref, sc_refs = N
 
             ref = sc_refs[cell]
 
-            exp_sc = exp_sc[,c('gene', 'seg', 'CHROM', 'cnv_state', 'seg_start', 'seg_end', cell)] %>%
+            exp_sc = exp_sc[,c('gene', 'seg', 'CHROM', 'cnv_state', 'loh', 'seg_start', 'seg_end', cell)] %>%
                 rename(Y_obs = ncol(.))
 
             exp_sc %>%
@@ -1712,7 +1714,7 @@ test_multi_allelic = function(bulks, segs_consensus, min_LLR = 5, p_min = 0.999)
     } else {
         segs_consensus = segs_consensus %>% 
             mutate(
-                n_states = ifelse(cnv_state == 'neu', 1, 0),
+                n_states = ifelse(cnv_state == 'neu', 0, 1),
                 cnv_states = cnv_state
             )
     }

diff --git a/R/utils.R b/R/utils.R
@@ -1027,7 +1027,7 @@ smooth_segs = function(bulk, min_genes = 10) {
     return(bulk)
 }
 
-#' Annotate a consensus segments on a pseudobulk dataframe
+#' Annotate a set of segments on a pseudobulk dataframe
 #' @param bulk dataframe Pseudobulk profile
 #' @param segs_consensus datatframe Consensus segment dataframe
 #' @return dataframe Pseudobulk profile

diff --git a/R/vis.R b/R/vis.R
@@ -505,7 +505,7 @@ plot_phylo_heatmap = function(
     # if no multi allelic CNVs
     if (!'n_states' %in% colnames(joint_post)) {
         joint_post = joint_post %>% mutate(
-            n_states = ifelse(cnv_state == 'neu', 1, 0),
+            n_states = ifelse(cnv_state == 'neu', 0, 1),
             cnv_states = cnv_state
         )
     } else {