Skip to content

Commit

Permalink
fix #183 and #178
Browse files Browse the repository at this point in the history
  • Loading branch information
teng-gao committed Apr 12, 2024
1 parent 9588475 commit d6b5c58
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 35 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: numbat
Title: Haplotype-Aware CNV Analysis from scRNA-Seq
URL: https://github.com/kharchenkolab/numbat/, https://kharchenkolab.github.io/numbat/
Version: 1.4.0
Version: 1.4.1
Authors@R: c(person("Teng","Gao", email="tgaoteng@gmail.com", role=c("cre", "aut")), person("Ruslan", "Soldatov", email="soldatr@mskcc.org", role="aut"), person("Hirak", "Sarkar", email="hirak_sarkar@hms.harvard.edu", role="aut"), person("Evan", "Biederstedt", email="evan.biederstedt@gmail.com", role="aut"), person("Peter", "Kharchenko", email = "peter_kharchenko@hms.harvard.edu", role = "aut"))
Description: A computational method that infers copy number variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor phylogeny. 'numbat' integrates signals from gene expression, allelic ratio, and population haplotype structures to accurately infer allele-specific CNVs in single cells and reconstruct their lineage relationship. 'numbat' can be used to: 1. detect allele-specific copy number variations from single-cells; 2. differentiate tumor versus normal cells in the tumor microenvironment; 3. infer the clonal architecture and evolutionary history of profiled tumors. 'numbat' does not require tumor/normal-paired DNA or genotype data, but operates solely on the donor scRNA-data data (for example, 10x Cell Ranger output). Additional examples and documentations are available at <https://kharchenkolab.github.io/numbat/>. For details on the method please see Gao et al. Nature Biotechnology (2022) <doi:10.1038/s41587-022-01468-y>.
License: MIT + file LICENSE
Expand Down
66 changes: 34 additions & 32 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -1190,9 +1190,9 @@ get_exp_likelihoods = function(exp_counts, diploid_chroms = NULL, use_loh = FALS
if (is.null(mu) | is.null(sigma)) {

if (!is.null(diploid_chroms)) {
exp_counts_diploid = exp_counts %>% filter(CHROM %in% diploid_chroms)
exp_counts_diploid = exp_counts %>% filter(!loh) %>% filter(CHROM %in% diploid_chroms)
} else {
exp_counts_diploid = exp_counts %>% filter(cnv_state %in% ref_states)
exp_counts_diploid = exp_counts %>% filter(!loh) %>% filter(cnv_state %in% ref_states)
}

fit = exp_counts_diploid %>% {fit_lnpois_cpp(.$Y_obs, .$lambda_ref, depth_obs)}
Expand Down Expand Up @@ -1273,39 +1273,41 @@ get_exp_sc = function(segs_consensus, count_mat, gtf, segs_loh = NULL) {
) %>%
ungroup()

if (!is.null(segs_loh)) {
exp_sc = exclude_loh(exp_sc, segs_loh)
}
exp_sc = exclude_loh(exp_sc, segs_loh)

return(exp_sc)
}

# exclude genes in clonal LOH regions
exclude_loh = function(exp_sc, segs_loh) {
exclude_loh = function(exp_sc, segs_loh = NULL) {

log_message('Excluding clonal LOH regions .. ')

genes_loh = GenomicRanges::findOverlaps(
exp_sc %>% {GenomicRanges::GRanges(
seqnames = .$CHROM,
IRanges::IRanges(start = .$gene_start,
end = .$gene_start)
)},
segs_loh %>% {GenomicRanges::GRanges(
seqnames = .$CHROM,
IRanges::IRanges(start = .$seg_start,
end = .$seg_end)
)}
) %>%
as.data.frame() %>%
setNames(c('gene_index', 'seg_index')) %>%
left_join(
exp_sc %>% mutate(gene_index = 1:n()),
by = c('gene_index')
) %>%
pull(gene)

exp_sc = exp_sc %>% mutate(cnv_state = ifelse(gene %in% genes_loh, 'del', cnv_state))
if (is.null(segs_loh)) {
exp_sc = exp_sc %>% mutate(loh = FALSE)
} else {
log_message('Excluding clonal LOH regions .. ')

genes_loh = GenomicRanges::findOverlaps(
exp_sc %>% {GenomicRanges::GRanges(
seqnames = .$CHROM,
IRanges::IRanges(start = .$gene_start,
end = .$gene_start)
)},
segs_loh %>% {GenomicRanges::GRanges(
seqnames = .$CHROM,
IRanges::IRanges(start = .$seg_start,
end = .$seg_end)
)}
) %>%
as.data.frame() %>%
setNames(c('gene_index', 'seg_index')) %>%
left_join(
exp_sc %>% mutate(gene_index = 1:n()),
by = c('gene_index')
) %>%
pull(gene)

exp_sc = exp_sc %>% mutate(loh = gene %in% genes_loh)
}

return(exp_sc)

Expand All @@ -1323,7 +1325,7 @@ get_exp_post = function(segs_consensus, count_mat, gtf, lambdas_ref, sc_refs = N
exp_sc = get_exp_sc(segs_consensus, count_mat, gtf, segs_loh)

if (is.null(use_loh)) {
if (mean(exp_sc$cnv_state == 'neu') < 0.05) {
if (mean(exp_sc$cnv_state == 'neu' & (!exp_sc$loh)) < 0.05) {
use_loh = TRUE
log_message('less than 5% genes are in neutral region - including LOH in baseline')
} else {
Expand All @@ -1346,7 +1348,7 @@ get_exp_post = function(segs_consensus, count_mat, gtf, lambdas_ref, sc_refs = N

ref = sc_refs[cell]

exp_sc = exp_sc[,c('gene', 'seg', 'CHROM', 'cnv_state', 'seg_start', 'seg_end', cell)] %>%
exp_sc = exp_sc[,c('gene', 'seg', 'CHROM', 'cnv_state', 'loh', 'seg_start', 'seg_end', cell)] %>%
rename(Y_obs = ncol(.))

exp_sc %>%
Expand Down Expand Up @@ -1712,7 +1714,7 @@ test_multi_allelic = function(bulks, segs_consensus, min_LLR = 5, p_min = 0.999)
} else {
segs_consensus = segs_consensus %>%
mutate(
n_states = ifelse(cnv_state == 'neu', 1, 0),
n_states = ifelse(cnv_state == 'neu', 0, 1),
cnv_states = cnv_state
)
}
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ smooth_segs = function(bulk, min_genes = 10) {
return(bulk)
}

#' Annotate a consensus segments on a pseudobulk dataframe
#' Annotate a set of segments on a pseudobulk dataframe
#' @param bulk dataframe Pseudobulk profile
#' @param segs_consensus datatframe Consensus segment dataframe
#' @return dataframe Pseudobulk profile
Expand Down
2 changes: 1 addition & 1 deletion R/vis.R
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ plot_phylo_heatmap = function(
# if no multi allelic CNVs
if (!'n_states' %in% colnames(joint_post)) {
joint_post = joint_post %>% mutate(
n_states = ifelse(cnv_state == 'neu', 1, 0),
n_states = ifelse(cnv_state == 'neu', 0, 1),
cnv_states = cnv_state
)
} else {
Expand Down

0 comments on commit d6b5c58

Please sign in to comment.