Skip to content

Commit

Permalink
Merge pull request #567 from FriederikeHanssen/vep
Browse files Browse the repository at this point in the history
VAriantcaller with multiple outputs causes MQC issues & interval names not resolved correctly for prefixes
  • Loading branch information
maxulysse committed Jun 8, 2022
2 parents 4ec6c48 + d18de9e commit 68687ef
Show file tree
Hide file tree
Showing 33 changed files with 181 additions and 473 deletions.
15 changes: 9 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
- "variantcalling_channel"
- "skip_markduplicates"
- "strelka"
- "strelkabp"
- "split_fastq"
- "targeted"
- "tumor_normal_pair"
Expand Down Expand Up @@ -80,16 +81,18 @@ jobs:
if: failure()
run: |
sudo apt install bat > /dev/null
batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err}
batcat --decorations=always --color=always /tmp/pytest_workflow_*/*/log.{out,err}
- name: Upload logs on failure
if: failure()
uses: actions/upload-artifact@v2
with:
name: logs-${{ matrix.profile }}
path: |
/home/runner/pytest_workflow_*/*/.nextflow.log
/home/runner/pytest_workflow_*/*/work
!/home/runner/pytest_workflow_*/*/work/conda
!/home/runner/pytest_workflow_*/*/work/singularity
/home/runner/pytest_workflow_*/**/.command.log
/tmp/pytest_workflow_*/*/.nextflow.log
/tmp/pytest_workflow_*/*/log.out
/tmp/pytest_workflow_*/*/log.err
/tmp/pytest_workflow_*/*/work
!/tmp/pytest_workflow_*/*/work/conda
!/tmp/pytest_workflow_*/*/work/singularity
/tmp/pytest_workflow_*/**/.command.log
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files
- [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals
- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with <https://nfcore.slack.com/archives/C027CM7P08M/p1644241819942339>
- [#567](https://github.com/nf-core/sarek/pull/567) - Fix interval name resolving during scatter/gather by moving logic to modules.config causing name to be correctly resolved on process execution; also fixed duplicate naming when variant callers produce multiple vcf files by adding field `type` to `meta` map

### Deprecated

Expand Down
66 changes: 34 additions & 32 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ process {

withName: 'GATK4_MARKDUPLICATES_SPARK' {
ext.args = '--remove-sequencing-duplicates false -VS LENIENT'
//TODO after step markduplicates we only deal with cram now, so this is not necessary, fix in PR that updates spark modules
ext.prefix = { !(params.skip_tools && (params.skip_tools.contains('bamqc') || params.skip_tools.contains('deeptools'))) ? "${meta.id}.md.bam" : "${meta.id}.md.cram" }
publishDir = [
mode: params.publish_dir_mode,
Expand All @@ -362,7 +363,7 @@ process {
// PREPARE_RECALIBRATION

withName: 'BASERECALIBRATOR|BASERECALIBRATOR_SPARK' {
ext.prefix = {"${meta.id}.recal"}
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" },
Expand All @@ -384,7 +385,7 @@ process {
// RECALIBRATE

withName: 'APPLYBQSR|APPLYBQSR_SPARK' {
ext.prefix = {"${meta.id}.recal"}
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" },
Expand Down Expand Up @@ -494,6 +495,7 @@ process{
ext.prefix = {"${meta.id}.g"}
}
withName: 'DEEPVARIANT' {
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}_${intervals.simpleName}" }
ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" }
ext.when = { params.tools && params.tools.contains('deepvariant') }
publishDir = [
Expand All @@ -520,7 +522,8 @@ process{
]
}
withName: 'FREEBAYES' {
ext.prefix = {"${meta.id}.freebayes"} //To make sure no naming conflicts ensue with module BCFTOOLS_SORT & the naming being correct in the output folder
//To make sure no naming conflicts ensue with module BCFTOOLS_SORT & the naming being correct in the output folder
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.freebayes" : "${meta.id}_${target_bed.simpleName}.freebayes" }
ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1'
ext.when = { params.tools && params.tools.contains('freebayes') }
publishDir = [
Expand All @@ -529,6 +532,7 @@ process{
}

withName: 'BCFTOOLS_SORT' {
ext.prefix = { "${vcf.baseName.minus(".freebayes.vcf")}" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" },
Expand Down Expand Up @@ -557,7 +561,7 @@ process{
}
withName: 'HAPLOTYPECALLER' {
ext.args = { params.joint_germline ? "-ERC GVCF" : "" }
ext.prefix = {"${meta.id}.g"}
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.g" : "${meta.id}_${intervals.simpleName}.g" }
ext.when = { params.tools && params.tools.contains('haplotypecaller') }
publishDir = [
enabled: params.no_intervals,
Expand Down Expand Up @@ -594,6 +598,7 @@ process{
ext.prefix = {"${meta.id}.candidate_sv"}
}
withName: 'MANTA.*' {
ext.prefix = { meta.num_intervals <= 1 ? meta.id : "${meta.id}_${target_bed.simpleName}" }
ext.args = { params.wes ? "--exome" : "" }
ext.when = { params.tools && params.tools.contains('manta') }
publishDir = [
Expand All @@ -619,6 +624,7 @@ process{
ext.prefix = {"${meta.id}.genome"}
}
withName: 'STRELKA_.*' {
ext.prefix = { meta.num_intervals <= 1 ? meta.id : "${meta.id}_${target_bed.simpleName}" }
ext.args = { params.wes ? "--exome" : "" }
ext.when = { params.tools && params.tools.contains('strelka') }
publishDir = [
Expand Down Expand Up @@ -648,7 +654,6 @@ process{
}

withName: 'CAT_MPILEUP_.*' {
ext.when = { meta.num_intervals > 1 }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/controlfreec" },
Expand Down Expand Up @@ -725,6 +730,7 @@ process{
}

withName: 'MPILEUP_.*' {
ext.prefix = { meta.num_intervals <= 1 ? meta.id : "${meta.id}_${intervals.simpleName}"}
ext.when = { params.tools && params.tools.contains('controlfreec') }
publishDir = [
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -804,14 +810,6 @@ process{
]
}

withName: 'GATHERPILEUPSUMMARIES' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'GATHERPILEUPSUMMARIES_.*' {
publishDir = [
mode: params.publish_dir_mode,
Expand All @@ -821,6 +819,7 @@ process{
}

withName: 'GETPILEUPSUMMARIES.*' {
ext.prefix = { meta.num_intervals <= 1 ? meta.id : "${meta.id}_${intervals.simpleName}" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" },
Expand Down Expand Up @@ -848,8 +847,9 @@ process{
}

withName: 'MUTECT2'{
ext.prefix = { meta.num_intervals <= 1 ? meta.id : "${meta.id}_${intervals.simpleName}" }
ext.when = { params.tools && params.tools.contains('mutect2') }
ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${meta.id}.f1r2.tar.gz" : "--f1r2-tar-gz ${meta.id}.f1r2.tar.gz" }
ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" },
Expand Down Expand Up @@ -882,11 +882,11 @@ process{

//CONTROLFREEC
withName: 'MPILEUP_NORMAL' {
ext.prefix = { "${meta.id}.normal" }
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" }
}

withName: 'MPILEUP_TUMOR' {
ext.prefix = { "${meta.id}.tumor" }
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" }
}

withName: 'CAT_MPILEUP_NORMAL' {
Expand Down Expand Up @@ -945,14 +945,14 @@ process{
}
//FREEBAYES
withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' {
ext.args = "--pooled-continuous \
--pooled-discrete \
--genotype-qualities \
--report-genotype-likelihood-max \
--allele-balance-priors-off \
--min-alternate-fraction 0.03 \
--min-repeat-entropy 1 \
--min-alternate-count 2 "
ext.args = "--pooled-continuous \
--pooled-discrete \
--genotype-qualities \
--report-genotype-likelihood-max \
--allele-balance-priors-off \
--min-alternate-fraction 0.03 \
--min-repeat-entropy 1 \
--min-alternate-count 2 "
}

//MANTA
Expand All @@ -964,8 +964,8 @@ process{

withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:MUTECT2' {
ext.args = { params.ignore_soft_clipped_bases ?
"--dont-use-soft-clipped-bases true --f1r2-tar-gz ${meta.id}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" :
"--f1r2-tar-gz ${meta.id}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" }
"--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" :
"--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" }
}

//MSISENSORPRO
Expand All @@ -988,7 +988,7 @@ process{
// VCF QC
withName: 'BCFTOOLS_STATS'{
ext.when = { !(params.skip_tools && params.skip_tools.contains('bcftools')) }
ext.prefix = { "${meta.variantcaller}_${vcf.baseName.minus(".vcf")}" }
ext.prefix = { meta.type ? "${meta.variantcaller}_${vcf.baseName.minus(".vcf")}_${meta.type}" : "${meta.variantcaller}_${vcf.baseName.minus(".vcf")}" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/reports/bcftools" },
Expand All @@ -998,7 +998,7 @@ process{

withName: 'VCFTOOLS_.*'{
ext.when = { !(params.skip_tools && params.skip_tools.contains('vcftools')) }
ext.prefix = { "${meta.variantcaller}_${variant_file.baseName.minus(".vcf")}" }
ext.prefix = { meta.type ? "${meta.variantcaller}_${variant_file.baseName.minus(".vcf")}_${meta.type}" : "${meta.variantcaller}_${variant_file.baseName.minus(".vcf")}" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/reports/vcftools" },
Expand All @@ -1021,6 +1021,7 @@ process{
// ANNOTATE

withName: 'ENSEMBLVEP' {
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_VEP.ann.vcf" : "${meta.variantcaller}_${meta.id}_VEP.ann.vcf" : "${meta.id}_VEP.ann.vcf" }
ext.args = [
'--everything --filter_common --per_gene --total_length --offline',
(params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '',
Expand All @@ -1037,10 +1038,11 @@ process{
}

withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" {
ext.prefix = {"${meta.id}_snpEff"}
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_snpEff_VEP.ann.vcf" : "${meta.variantcaller}_${meta.id}_snpEff_VEP.ann.vcf" : "${meta.id}_snpEff_VEP.ann.vcf" }
}

withName: 'SNPEFF' {
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_snpEff.ann.vcf" : "${meta.variantcaller}_${meta.id}_snpEff.ann.vcf" : "${meta.id}_snpEff.ann.vcf" }
ext.args = '-nodownload -canon -v'
if (!params.snpeff_cache) container = { params.snpeff_genome ? "nfcore/snpeff:5.0.${params.snpeff_genome}" : "nfcore/snpeff:5.0.${params.genome}" }
publishDir = [
Expand All @@ -1060,7 +1062,7 @@ process{
}

withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_SNPEFF:TABIX_BGZIPTABIX' {
ext.prefix = {"${meta.id}_snpEff.ann.vcf"}
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_snpEff.ann.vcf" : "${meta.variantcaller}_${meta.id}_snpEff.ann.vcf" : "${meta.id}_snpEff.ann.vcf" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" },
Expand All @@ -1070,11 +1072,11 @@ process{
}

withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:TABIX_BGZIPTABIX' {
ext.prefix = {"${meta.id}_VEP.ann.vcf"}
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_VEP.ann.vcf" : "${meta.variantcaller}_${meta.id}_VEP.ann.vcf" : "${meta.id}_VEP.ann.vcf" }
}

withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_MERGE:TABIX_BGZIPTABIX' {
ext.prefix = {"${meta.id}_snpEff_VEP.ann.vcf"}
ext.prefix = { meta.variantcaller ? meta.type ? "${meta.variantcaller}_${meta.id}_${meta.type}_snpEff_VEP.ann.vcf" : "${meta.variantcaller}_${meta.id}_snpEff_VEP.ann.vcf" : "${meta.id}_snpEff_VEP.ann.vcf" }
}

// MULTIQC
Expand Down
2 changes: 2 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ params.intervals = WorkflowMain.getGenomeAttribute(params, 'interval
params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels')
params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi')
params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability')
params.pon = WorkflowMain.getGenomeAttribute(params, 'pon')
params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi')
params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db')
params.snpeff_genome = WorkflowMain.getGenomeAttribute(params, 'snpeff_genome')
params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version')
Expand Down
5 changes: 1 addition & 4 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
"homePage": "https://github.com/nf-core/sarek",
"repos": {
"nf-core/modules": {
"ascat": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
},
"bcftools/sort": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
Expand Down Expand Up @@ -235,7 +232,7 @@
"git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed"
},
"untar": {
"git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
"git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5"
},
"vcftools": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
Expand Down
Loading

0 comments on commit 68687ef

Please sign in to comment.