From 611f1c86892ab3a856a52b1e23190750bae4580c Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sat, 7 Dec 2019 12:33:52 -1000 Subject: [PATCH] unifier: with preference=small, also require all alleles at a site to mutually overlap --- src/unifier.cc | 22 +++++++++++++++++----- test/unifier.cc | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/src/unifier.cc b/src/unifier.cc index 6db20590..fc72a27d 100644 --- a/src/unifier.cc +++ b/src/unifier.cc @@ -289,18 +289,30 @@ auto prune_alleles(const unifier_config& cfg, const minimized_alleles& alleles, for (const minimized_allele& mal : valleles) { // find existing site(s) overlapping this allele auto related_site = sites.end(); - bool multiple_sites = false; - for (auto site = sites.begin(); site != sites.end(); site++) { + bool reject = false; + for (auto site = sites.begin(); site != sites.end() && !reject; site++) { if (mal.first.pos.overlaps(site->first)) { if (related_site == sites.end()) { - related_site = site; + if (cfg.preference == UnifierPreference::Small) { + // for preference=small, also require overlap with all + // alleles currently at the site + for (const auto& site_allele : site->second) { + if (!mal.first.pos.overlaps(site_allele.first.pos)) { + reject = true; + break; + } + } + } + if (!reject) { + related_site = site; + } } else { - multiple_sites = true; + reject = true; break; } } } - if (multiple_sites) { + if (reject) { // reject since this allele overlaps multiple existing sites pruned.insert(mal); continue; diff --git a/test/unifier.cc b/test/unifier.cc index 9ded85b0..7f91eb01 100644 --- a/test/unifier.cc +++ b/test/unifier.cc @@ -78,3 +78,50 @@ TEST_CASE("unifier max_alleles_per_site") { REQUIRE(sites[0].alleles[1].dna == "G"); } } + +TEST_CASE("unifier small preference") { + discovered_alleles dal; + discovered_allele_info dai; + + dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100); + dal[allele(range(0, 100, 101), "A")] = dai; + dal[allele(range(0, 104, 105), "A")] = dai; + dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,101); + dal[allele(range(0, 100, 101), "G")] = dai; + dal[allele(range(0, 104, 105), "T")] = dai; + dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100); + dal[allele(range(0, 100, 102), "AC")] = dai; + dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,101); + dal[allele(range(0, 100, 102), "A")] = dai; + dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100); + dal[allele(range(0, 101, 104), "CTG")] = dai; + dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,110); + dal[allele(range(0, 101, 104), "C")] = dai; + dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100); + dal[allele(range(0, 99, 105), "GACTGA")] = dai; + dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,120); + dal[allele(range(0, 99, 105), "G")] = dai; + + unifier_config cfg; + cfg.preference = UnifierPreference::Small; + cfg.monoallelic_sites_for_lost_alleles = true; + + vector sites; + unifier_stats stats; + Status s = unified_sites(cfg, 200, dal, sites, stats); + + cout << s.str() << endl; + REQUIRE(s.ok()); + REQUIRE(sites.size() == 4); + REQUIRE(sites[0].pos == range(0, 99, 105)); + REQUIRE(sites[0].alleles.size() == 2); + REQUIRE(sites[0].monoallelic); + REQUIRE(sites[1].pos == range(0, 100, 102)); + REQUIRE(sites[1].alleles.size() == 3); + REQUIRE(sites[2].pos == range(0, 101, 104)); + REQUIRE(sites[2].alleles.size() == 2); + REQUIRE(sites[2].monoallelic); + REQUIRE(sites[3].pos == range(0, 104, 105)); + REQUIRE(sites[3].alleles.size() == 2); + REQUIRE(!sites[3].monoallelic); +}