Skip to content

Commit

Permalink
Merge pull request #641 from FriederikeHanssen/estimatelib
Browse files Browse the repository at this point in the history
Fix duplicate substrings in tools and skip_tools
  • Loading branch information
maxulysse committed Jul 17, 2022
2 parents d2388d1 + 211e62d commit 28aa979
Show file tree
Hide file tree
Showing 9 changed files with 176 additions and 92 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
- "save_bam_mapped"
- "save_output_as_bam"
- "skip_markduplicates"
- "skip_qc"
- "split_fastq"
- "strelka"
- "strelkabp"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#618](https://github.com/nf-core/sarek/pull/618) - Fix issue with tiddit [#621](https://github.com/nf-core/sarek/issues/621)
- [#618](https://github.com/nf-core/sarek/pull/618) - Fix channel issue with `targets.bed` in prepare_intervals
- [#634](https://github.com/nf-core/sarek/pull/634) - Fix issue with samtools/mosdepth plots in multiqc_report
- [#641](https://github.com/nf-core/sarek/pull/641) - Fix issue with duplicate substring in tools and skip_tools

### Deprecated

Expand Down
92 changes: 46 additions & 46 deletions conf/modules.config

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions subworkflows/local/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,23 @@ workflow ANNOTATE {
vep_cache_version
vep_cache
vep_extra_files

main:
ch_reports = Channel.empty()
ch_vcf_ann = Channel.empty()
ch_tab_ann = Channel.empty()
ch_json_ann = Channel.empty()
ch_versions = Channel.empty()

if (tools.contains('merge') || tools.contains('snpeff')) {
if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff')) {
ANNOTATION_SNPEFF(vcf, snpeff_db, snpeff_cache)

ch_reports = ch_reports.mix(ANNOTATION_SNPEFF.out.reports)
ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_SNPEFF.out.vcf_tbi)
ch_versions = ch_versions.mix(ANNOTATION_SNPEFF.out.versions.first())
}

if (tools.contains('merge')) {
if (tools.split(',').contains('merge')) {
vcf_ann_for_merge = ANNOTATION_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [meta, vcf] }
ANNOTATION_MERGE(vcf_ann_for_merge, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files)

Expand All @@ -43,7 +43,7 @@ workflow ANNOTATE {
ch_versions = ch_versions.mix(ANNOTATION_MERGE.out.versions.first())
}

if (tools.contains('vep')) {
if (tools.split(',').contains('vep')) {
ANNOTATION_ENSEMBLVEP(vcf, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files)

ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports)
Expand Down
17 changes: 9 additions & 8 deletions subworkflows/local/germline_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ workflow GERMLINE_VARIANT_CALLING {
strelka_vcf = Channel.empty()
tiddit_vcf = Channel.empty()

println tools
// Remap channel with intervals
cram_recalibrated_intervals = cram_recalibrated.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
Expand All @@ -65,7 +66,7 @@ workflow GERMLINE_VARIANT_CALLING {
cram, crai, bed_new, tbi_new]
}

if(params.tools.contains('mpileup')){
if(tools.split(',').contains('mpileup')){
cram_intervals_no_index = cram_recalibrated_intervals
.map { meta, cram, crai, intervals ->
[meta, cram, intervals]
Expand All @@ -79,7 +80,7 @@ workflow GERMLINE_VARIANT_CALLING {

// CNVKIT

if(tools.contains('cnvkit')){
if(tools.split(',').contains('cnvkit')){
cram_recalibrated_cnvkit_germline = cram_recalibrated
.map{ meta, cram, crai ->
[meta, [], cram]
Expand All @@ -94,15 +95,15 @@ workflow GERMLINE_VARIANT_CALLING {
}

// DEEPVARIANT
if(tools.contains('deepvariant')){
if(tools.split(',').contains('deepvariant')){
RUN_DEEPVARIANT(cram_recalibrated_intervals, dict, fasta, fasta_fai)

deepvariant_vcf = Channel.empty().mix(RUN_DEEPVARIANT.out.deepvariant_vcf,RUN_DEEPVARIANT.out.deepvariant_gvcf)
ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions)
}

// FREEBAYES
if (tools.contains('freebayes')){
if (tools.split(',').contains('freebayes')){
// Remap channel for Freebayes
cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals
.map{ meta, cram, crai, intervals ->
Expand All @@ -115,7 +116,7 @@ workflow GERMLINE_VARIANT_CALLING {
}

// HAPLOTYPECALLER
if (tools.contains('haplotypecaller')){
if (tools.split(',').contains('haplotypecaller')){
RUN_HAPLOTYPECALLER(cram_recalibrated_intervals,
fasta,
fasta_fai,
Expand All @@ -131,7 +132,7 @@ workflow GERMLINE_VARIANT_CALLING {
}

// MANTA
if (tools.contains('manta')){
if (tools.split(',').contains('manta')){
RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi,
dict,
fasta,
Expand All @@ -142,7 +143,7 @@ workflow GERMLINE_VARIANT_CALLING {
}

// STRELKA
if (tools.contains('strelka')){
if (tools.split(',').contains('strelka')){
RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi,
dict,
fasta,
Expand All @@ -153,7 +154,7 @@ workflow GERMLINE_VARIANT_CALLING {
}

//TIDDIT
if (tools.contains('tiddit')){
if (tools.split(',').contains('tiddit')){
RUN_TIDDIT(cram_recalibrated,
fasta,
bwa)
Expand Down
20 changes: 10 additions & 10 deletions subworkflows/local/pair_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ workflow PAIR_VARIANT_CALLING {

}

if (tools.contains('ascat')){
if (tools.split(',').contains('ascat')){

RUN_ASCAT_SOMATIC( cram_pair,
allele_files,
Expand All @@ -89,7 +89,7 @@ workflow PAIR_VARIANT_CALLING {

}

if (tools.contains('controlfreec')){
if (tools.split(',').contains('controlfreec')){
cram_normal_intervals_no_index = cram_pair_intervals
.map {meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals ->
[meta, normal_cram, intervals]
Expand Down Expand Up @@ -124,7 +124,7 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_CONTROLFREEC_SOMATIC.out.versions)
}

if (tools.contains('cnvkit')){
if (tools.split(',').contains('cnvkit')){
cram_pair_cnvkit_somatic = cram_pair
.map{meta, normal_cram, normal_crai, tumor_cram, tumor_crai ->
[meta, tumor_cram, normal_cram]
Expand All @@ -137,14 +137,14 @@ workflow PAIR_VARIANT_CALLING {
[])
}

if (tools.contains('freebayes')){
if (tools.split(',').contains('freebayes')){
RUN_FREEBAYES_SOMATIC(cram_pair_intervals, dict, fasta, fasta_fai)

freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf
ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions)
}

if (tools.contains('manta')) {
if (tools.split(',').contains('manta')) {
RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi,
dict,
fasta,
Expand All @@ -156,9 +156,9 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_MANTA_SOMATIC.out.versions)
}

if (tools.contains('strelka')) {
if (tools.split(',').contains('strelka')) {

if (tools.contains('manta')) {
if (tools.split(',').contains('manta')) {
cram_pair_strelka = cram_pair.join(manta_candidate_small_indels_vcf)
.join(manta_candidate_small_indels_vcf_tbi)
.combine(intervals_bed_gz_tbi)
Expand Down Expand Up @@ -188,15 +188,15 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_STRELKA_SOMATIC.out.versions)
}

if (tools.contains('msisensorpro')) {
if (tools.split(',').contains('msisensorpro')) {

cram_pair_msisensor = cram_pair.combine(intervals_bed_combined)
MSISENSORPRO_MSI_SOMATIC(cram_pair_msisensor, fasta, msisensorpro_scan)
ch_versions = ch_versions.mix(MSISENSORPRO_MSI_SOMATIC.out.versions)
msisensorpro_output = msisensorpro_output.mix(MSISENSORPRO_MSI_SOMATIC.out.output_report)
}

if (tools.contains('mutect2')) {
if (tools.split(',').contains('mutect2')) {
cram_pair_mutect2 = cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals ->
[meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals]
}
Expand All @@ -216,7 +216,7 @@ workflow PAIR_VARIANT_CALLING {
}

//TIDDIT
if (tools.contains('tiddit')){
if (tools.split(',').contains('tiddit')){
cram_normal = cram_pair.map{meta, normal_cram, normal_crai, tumor_cram, tumor_crai ->
[meta, normal_cram, normal_crai]
}
Expand Down
16 changes: 8 additions & 8 deletions subworkflows/local/tumor_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
cram, crai, bed_new, tbi_new]
}

if (tools.contains('mpileup') || tools.contains('controlfreec')){
if (tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')){
cram_intervals_no_index = cram_recalibrated_intervals.map { meta, cram, crai, intervals ->
[meta, cram, intervals]
}
Expand All @@ -75,7 +75,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_MPILEUP.out.versions)
}

if (tools.contains('controlfreec')){
if (tools.split(',').contains('controlfreec')){
controlfreec_input = RUN_MPILEUP.out.mpileup
.map{ meta, pileup_tumor ->
[meta, [], pileup_tumor, [], [], [], []]
Expand All @@ -93,7 +93,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_CONTROLFREEC_TUMORONLY.out.versions)
}

if(tools.contains('cnvkit')){
if(tools.split(',').contains('cnvkit')){
cram_recalibrated_cnvkit_tumoronly = cram_recalibrated
.map{ meta, cram, crai ->
[meta, cram, []]
Expand All @@ -108,7 +108,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_CNVKIT_TUMORONLY.out.versions)
}

if (tools.contains('freebayes')){
if (tools.split(',').contains('freebayes')){
// Remap channel for Freebayes
cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals
.map{ meta, cram, crai, intervals ->
Expand All @@ -121,7 +121,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions)
}

if (tools.contains('mutect2')) {
if (tools.split(',').contains('mutect2')) {
GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING(cram_recalibrated_intervals,
fasta,
fasta_fai,
Expand All @@ -135,7 +135,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions)
}

if (tools.contains('manta')){
if (tools.split(',').contains('manta')){
RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi,
dict,
fasta,
Expand All @@ -145,7 +145,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions)
}

if (tools.contains('strelka')) {
if (tools.split(',').contains('strelka')) {
RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi,
dict,
fasta,
Expand All @@ -156,7 +156,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
}

//TIDDIT
if (tools.contains('tiddit')){
if (tools.split(',').contains('tiddit')){
RUN_TIDDIT(cram_recalibrated,
fasta,
bwa)
Expand Down
81 changes: 81 additions & 0 deletions tests/test_skip_qc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
- name: Run default pipeline with skipping all QC steps
command: nextflow run main.nf -profile test,docker --skip_tools 'fastqc,markduplicates_report,mosdepth,multiqc,samtools'
tags:
- skip_qc
- preprocessing
files:
- path: results/multiqc
should_exist: false
- path: results/preprocessing/test/markduplicates/test.md.cram
- path: results/preprocessing/test/markduplicates/test.md.cram.crai
- path: results/preprocessing/test/recal_table/test.recal.table
- path: results/preprocessing/test/recalibrated/test.recal.cram
- path: results/preprocessing/test/recalibrated/test.recal.cram.crai
- path: results/csv/markduplicates.csv
- path: results/csv/markduplicates_no_table.csv
- path: results/csv/recalibrated.csv
- path: results/reports/fastqc/test-test_L1
should_exist: false
- path: results/reports/markduplicates/test/test.md.metrics
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.regions.bed.gz
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.regions.bed.gz
should_exist: false
- path: results/reports/samtools_stats/test/test.md.cram.stats
should_exist: false
- path: results/reports/samtools_stats/test/test.recal.cram.stats
should_exist: false

- name: Run spark pipeline with skipping all QC steps
command: nextflow run main.nf -profile test,use_gatk_spark,docker --skip_tools 'fastqc,markduplicates_report,mosdepth,multiqc,samtools'
tags:
- skip_qc
- preprocessing
files:
- path: results/multiqc
should_exist: false
- path: results/preprocessing/test/markduplicates/test.md.cram
- path: results/preprocessing/test/markduplicates/test.md.cram.crai
- path: results/preprocessing/test/recal_table/test.recal.table
- path: results/preprocessing/test/recalibrated/test.recal.cram
- path: results/preprocessing/test/recalibrated/test.recal.cram.crai
- path: results/csv/markduplicates.csv
- path: results/csv/markduplicates_no_table.csv
- path: results/csv/recalibrated.csv
- path: results/reports/fastqc/test-test_L1
should_exist: false
- path: results/reports/markduplicates/test/test.md.metrics
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.md.regions.bed.gz
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt
should_exist: false
- path: results/reports/mosdepth/test/test.recal.regions.bed.gz
should_exist: false
- path: results/reports/samtools_stats/test/test.md.cram.stats
should_exist: false
- path: results/reports/samtools_stats/test/test.recal.cram.stats
should_exist: false
Loading

0 comments on commit 28aa979

Please sign in to comment.