Skip to content

Commit

Permalink
unifier: with preference=small, also require all alleles at a site to…
Browse files Browse the repository at this point in the history
… mutually overlap
  • Loading branch information
mlin committed Dec 7, 2019
1 parent 3f008a1 commit 611f1c8
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 5 deletions.
22 changes: 17 additions & 5 deletions src/unifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -289,18 +289,30 @@ auto prune_alleles(const unifier_config& cfg, const minimized_alleles& alleles,
for (const minimized_allele& mal : valleles) {
// find existing site(s) overlapping this allele
auto related_site = sites.end();
bool multiple_sites = false;
for (auto site = sites.begin(); site != sites.end(); site++) {
bool reject = false;
for (auto site = sites.begin(); site != sites.end() && !reject; site++) {
if (mal.first.pos.overlaps(site->first)) {
if (related_site == sites.end()) {
related_site = site;
if (cfg.preference == UnifierPreference::Small) {
// for preference=small, also require overlap with all
// alleles currently at the site
for (const auto& site_allele : site->second) {
if (!mal.first.pos.overlaps(site_allele.first.pos)) {
reject = true;
break;
}
}
}
if (!reject) {
related_site = site;
}
} else {
multiple_sites = true;
reject = true;
break;
}
}
}
if (multiple_sites) {
if (reject) {
// reject since this allele overlaps multiple existing sites
pruned.insert(mal);
continue;
Expand Down
47 changes: 47 additions & 0 deletions test/unifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,50 @@ TEST_CASE("unifier max_alleles_per_site") {
REQUIRE(sites[0].alleles[1].dna == "G");
}
}

TEST_CASE("unifier small preference") {
discovered_alleles dal;
discovered_allele_info dai;

dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100);
dal[allele(range(0, 100, 101), "A")] = dai;
dal[allele(range(0, 104, 105), "A")] = dai;
dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,101);
dal[allele(range(0, 100, 101), "G")] = dai;
dal[allele(range(0, 104, 105), "T")] = dai;
dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100);
dal[allele(range(0, 100, 102), "AC")] = dai;
dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,101);
dal[allele(range(0, 100, 102), "A")] = dai;
dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100);
dal[allele(range(0, 101, 104), "CTG")] = dai;
dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,110);
dal[allele(range(0, 101, 104), "C")] = dai;
dai.is_ref = true; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,100);
dal[allele(range(0, 99, 105), "GACTGA")] = dai;
dai.is_ref = false; dai.topAQ = top_AQ(99); dai.zGQ = zygosity_by_GQ(1,0,120);
dal[allele(range(0, 99, 105), "G")] = dai;

unifier_config cfg;
cfg.preference = UnifierPreference::Small;
cfg.monoallelic_sites_for_lost_alleles = true;

vector<unified_site> sites;
unifier_stats stats;
Status s = unified_sites(cfg, 200, dal, sites, stats);

cout << s.str() << endl;
REQUIRE(s.ok());
REQUIRE(sites.size() == 4);
REQUIRE(sites[0].pos == range(0, 99, 105));
REQUIRE(sites[0].alleles.size() == 2);
REQUIRE(sites[0].monoallelic);
REQUIRE(sites[1].pos == range(0, 100, 102));
REQUIRE(sites[1].alleles.size() == 3);
REQUIRE(sites[2].pos == range(0, 101, 104));
REQUIRE(sites[2].alleles.size() == 2);
REQUIRE(sites[2].monoallelic);
REQUIRE(sites[3].pos == range(0, 104, 105));
REQUIRE(sites[3].alleles.size() == 2);
REQUIRE(!sites[3].monoallelic);
}

0 comments on commit 611f1c8

Please sign in to comment.