From 573ab99a85679341ca51fed6b0868d983c9bc2f1 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Wed, 8 Jun 2022 18:15:55 +0200 Subject: [PATCH 01/12] WIP: Adding option for setting file-output-format for VEP --- conf/modules.config | 14 +++++++++++--- modules/nf-core/modules/ensemblvep/main.nf | 16 ++++++++++------ nextflow.config | 1 + subworkflows/local/annotate.nf | 19 ++++++++++++------- .../nf-core/annotation/ensemblvep/main.nf | 5 ++++- workflows/sarek.nf | 1 + 6 files changed, 39 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7a4e0b055d..e8c0f4ede2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1028,11 +1028,19 @@ process{ (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '', (params.vep_spliceregion) ? '--plugin SpliceRegion' : '' ].join(' ').trim() + ext.suffix = '_VEP' if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/EnsemblVEP/${meta.id}/${meta.variantcaller}" }, - pattern: "*html" + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.id}/${meta.variantcaller}" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, + pattern: "*{json,tab}" + ] ] } diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index a5a9b1abcc..9847182ec1 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -9,6 +9,7 @@ process ENSEMBLVEP { input: tuple val(meta), path(vcf) + val vep_output val genome val species val cache_version @@ -16,15 +17,18 @@ process ENSEMBLVEP { path extra_files output: - tuple val(meta), path("*.ann.vcf"), emit: vcf - path "*.summary.html" , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.ann.vcf") , optional:true, emit: vcf + tuple val(meta), path("*.ann.tab") , optional:true, emit: tab + tuple val(meta), path("*.ann.json") , optional:true, emit: json + path "*.summary.html" , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def suffix = task.ext.suffix ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ @@ -32,7 +36,7 @@ process ENSEMBLVEP { vep \\ -i $vcf \\ - -o ${prefix}.ann.vcf \\ + -o ${prefix}${suffix}.ann.$vep_output \\ $args \\ --assembly $genome \\ --species $species \\ @@ -40,8 +44,8 @@ process ENSEMBLVEP { --cache_version $cache_version \\ --dir_cache $dir_cache \\ --fork $task.cpus \\ - --vcf \\ - --stats_file ${prefix}.summary.html + --stats_file ${prefix}.summary.html \\ + --$vep_output rm -rf $prefix diff --git a/nextflow.config b/nextflow.config index 3c98b3ebb8..043a171083 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,6 +69,7 @@ params { joint_germline = false // // Annotation + vep_output = 'vcf' vep_dbnsfp = null // dbnsfp plugin disabled within VEP dbnsfp = null // No dbnsfp processed file dbnsfp_tbi = null // No dbnsfp processed file index diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index a9c386b011..fd2a9a7d5a 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -12,6 +12,7 @@ workflow ANNOTATE { tools snpeff_db snpeff_cache + vep_output vep_genome vep_species vep_cache_version @@ -41,15 +42,19 @@ workflow ANNOTATE { } if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_ENSEMBLVEP(vcf, vep_output, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) - ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) - ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) - ch_versions = ch_versions.mix(ANNOTATION_ENSEMBLVEP.out.versions.first()) + ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) + ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) + ch_tab_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.tab) + ch_json_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.json) + ch_versions = ch_versions.mix(ANNOTATION_ENSEMBLVEP.out.versions.first()) } emit: - vcf_ann = ch_vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = ch_reports // path: *.html - versions = ch_versions // path: versions.yml + vcf_ann = ch_vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + tab_ann = ch_tab_ann + json_ann = ch_json_ann + reports = ch_reports // path: *.html + versions = ch_versions // path: versions.yml } diff --git a/subworkflows/nf-core/annotation/ensemblvep/main.nf b/subworkflows/nf-core/annotation/ensemblvep/main.nf index 4c7d0e3e69..8d1cc849b6 100644 --- a/subworkflows/nf-core/annotation/ensemblvep/main.nf +++ b/subworkflows/nf-core/annotation/ensemblvep/main.nf @@ -8,6 +8,7 @@ include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgz workflow ANNOTATION_ENSEMBLVEP { take: vcf // channel: [ val(meta), vcf ] + vep_output vep_genome // value: genome to use vep_species // value: species to use vep_cache_version // value: cache version to use @@ -17,7 +18,7 @@ workflow ANNOTATION_ENSEMBLVEP { main: ch_versions = Channel.empty() - ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ENSEMBLVEP(vcf, vep_output, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) TABIX_BGZIPTABIX(ENSEMBLVEP.out.vcf) // Gather versions of all tools used @@ -26,6 +27,8 @@ workflow ANNOTATION_ENSEMBLVEP { emit: vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + json = ENSEMBLVEP.out.json + tab = ENSEMBLVEP.out.tab reports = ENSEMBLVEP.out.report // path: *.html versions = ch_versions // path: versions.yml } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index dec5b15ca3..6fffb38fe7 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -867,6 +867,7 @@ workflow SAREK { params.tools, snpeff_db, snpeff_cache, + params.vep_output, vep_genome, vep_species, vep_cache_version, From 81e0a7865143628ff8b918f4d97b6a0d04d041de Mon Sep 17 00:00:00 2001 From: Lasse Folkersen Date: Fri, 10 Jun 2022 06:43:03 +0000 Subject: [PATCH 02/12] suggestion on how to get the output extension in the args instead --- modules/nf-core/modules/ensemblvep/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index 9847182ec1..e1ee948237 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -29,7 +29,7 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' def suffix = task.ext.suffix ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = args.prefix ? "$args.prefix" : "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ mkdir $prefix From 93e90978b4901972855238c985660e8fab2819b9 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Fri, 10 Jun 2022 16:19:09 +0200 Subject: [PATCH 03/12] Setting vep-output-format via task.ext.vep_output --- conf/modules.config | 1 + modules/nf-core/modules/ensemblvep/main.nf | 4 ++-- subworkflows/local/annotate.nf | 3 +-- subworkflows/nf-core/annotation/ensemblvep/main.nf | 3 +-- workflows/sarek.nf | 1 - 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e8c0f4ede2..cbd99a07a0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1029,6 +1029,7 @@ process{ (params.vep_spliceregion) ? '--plugin SpliceRegion' : '' ].join(' ').trim() ext.suffix = '_VEP' + ext.vep_output = params.vep_output if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } publishDir = [ [ diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index e1ee948237..51cef6b991 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -9,7 +9,6 @@ process ENSEMBLVEP { input: tuple val(meta), path(vcf) - val vep_output val genome val species val cache_version @@ -28,8 +27,9 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' + def vep_output = task.ext.vep_output ?: 'vcf' def suffix = task.ext.suffix ?: '' - def prefix = args.prefix ? "$args.prefix" : "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ mkdir $prefix diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index fd2a9a7d5a..06cf473a5e 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -12,7 +12,6 @@ workflow ANNOTATE { tools snpeff_db snpeff_cache - vep_output vep_genome vep_species vep_cache_version @@ -42,7 +41,7 @@ workflow ANNOTATE { } if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_output, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) diff --git a/subworkflows/nf-core/annotation/ensemblvep/main.nf b/subworkflows/nf-core/annotation/ensemblvep/main.nf index 8d1cc849b6..08ada62294 100644 --- a/subworkflows/nf-core/annotation/ensemblvep/main.nf +++ b/subworkflows/nf-core/annotation/ensemblvep/main.nf @@ -8,7 +8,6 @@ include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgz workflow ANNOTATION_ENSEMBLVEP { take: vcf // channel: [ val(meta), vcf ] - vep_output vep_genome // value: genome to use vep_species // value: species to use vep_cache_version // value: cache version to use @@ -18,7 +17,7 @@ workflow ANNOTATION_ENSEMBLVEP { main: ch_versions = Channel.empty() - ENSEMBLVEP(vcf, vep_output, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) TABIX_BGZIPTABIX(ENSEMBLVEP.out.vcf) // Gather versions of all tools used diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 6fffb38fe7..dec5b15ca3 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -867,7 +867,6 @@ workflow SAREK { params.tools, snpeff_db, snpeff_cache, - params.vep_output, vep_genome, vep_species, vep_cache_version, From d5cc7887acfbdda245940eb090c1b2a185a1f845 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Mon, 13 Jun 2022 13:59:23 +0200 Subject: [PATCH 04/12] Including vep-output-format in args for ENSEMBLVEP-module --- conf/modules.config | 10 +++++++--- modules/nf-core/modules/ensemblvep/main.nf | 9 ++------- nextflow.config | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cbd99a07a0..de039e3ff1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1026,10 +1026,9 @@ process{ (params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '', (params.vep_loftee) ? '--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-104.3/share/ensembl-vep-104.3-0' : '', (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '', - (params.vep_spliceregion) ? '--plugin SpliceRegion' : '' + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' ].join(' ').trim() - ext.suffix = '_VEP' - ext.vep_output = params.vep_output if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } publishDir = [ [ @@ -1045,6 +1044,11 @@ process{ ] } + withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ENSEMBLVEP' { + ext.prefix = {"${meta.id}_VEP.ann"} + ext.args2 = {"${params.vep_out_format}"} + } + withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { ext.prefix = {"${meta.id}_snpEff"} } diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index 51cef6b991..3886f8a804 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -27,16 +27,13 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' - def vep_output = task.ext.vep_output ?: 'vcf' - def suffix = task.ext.suffix ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ - mkdir $prefix - vep \\ -i $vcf \\ - -o ${prefix}${suffix}.ann.$vep_output \\ + -o ${prefix}.${args2} \\ $args \\ --assembly $genome \\ --species $species \\ @@ -45,9 +42,7 @@ process ENSEMBLVEP { --dir_cache $dir_cache \\ --fork $task.cpus \\ --stats_file ${prefix}.summary.html \\ - --$vep_output - rm -rf $prefix cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 043a171083..47d0373f6b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,7 +69,7 @@ params { joint_germline = false // // Annotation - vep_output = 'vcf' + vep_out_format = 'vcf' vep_dbnsfp = null // dbnsfp plugin disabled within VEP dbnsfp = null // No dbnsfp processed file dbnsfp_tbi = null // No dbnsfp processed file index From 58a26f2c6159abf52efd147a739c16c9710858d4 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 14 Jun 2022 15:49:21 +0200 Subject: [PATCH 05/12] Fetching vep-output-file-extension from args. (No longer using args2.) --- conf/modules.config | 2 +- modules/nf-core/modules/ensemblvep/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index de039e3ff1..babe099e84 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1045,7 +1045,7 @@ process{ } withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ENSEMBLVEP' { - ext.prefix = {"${meta.id}_VEP.ann"} + ext.prefix = {"${meta.id}_VEP"} ext.args2 = {"${params.vep_out_format}"} } diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index 3886f8a804..debebbc049 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -27,13 +27,13 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ vep \\ -i $vcf \\ - -o ${prefix}.${args2} \\ + -o ${prefix}.ann.${file_extension} \\ $args \\ --assembly $genome \\ --species $species \\ From c7b85196dd020d2f3828c87dcf9e1ec87b970991 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 14 Jun 2022 16:44:22 +0200 Subject: [PATCH 06/12] Initializing empty channels for tab_ann and json_ann --- conf/modules.config | 1 - modules/nf-core/modules/ensemblvep/main.nf | 2 +- subworkflows/local/annotate.nf | 8 +++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index babe099e84..d8e9efd1ca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1046,7 +1046,6 @@ process{ withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ENSEMBLVEP' { ext.prefix = {"${meta.id}_VEP"} - ext.args2 = {"${params.vep_out_format}"} } withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index debebbc049..ccc6838c38 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -27,7 +27,7 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' - def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" """ diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 06cf473a5e..dde37b7a3b 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -19,9 +19,11 @@ workflow ANNOTATE { vep_extra_files main: - ch_reports = Channel.empty() - ch_vcf_ann = Channel.empty() - ch_versions = Channel.empty() + ch_reports = Channel.empty() + ch_vcf_ann = Channel.empty() + ch_tab_ann = Channel.empty() + ch_json_ann = Channel.empty() + ch_versions = Channel.empty() if (tools.contains('merge') || tools.contains('snpeff')) { ANNOTATION_SNPEFF(vcf, snpeff_db, snpeff_cache) From ec7df344bb5c7803fdb7fa7fe573d1c3ab73bfbe Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 14 Jun 2022 17:10:00 +0200 Subject: [PATCH 07/12] Adding new CLI-option vep_out_format to nextflow_schema.json --- nextflow_schema.json | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index e5e029de25..47cd44572d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,11 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["step", "input", "outdir"], + "required": [ + "step", + "input", + "outdir" + ], "properties": { "step": { "type": "string", @@ -199,7 +203,11 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": ["bwa-mem", "bwa-mem2", "dragmap"], + "enum": [ + "bwa-mem", + "bwa-mem2", + "dragmap" + ], "description": "Specify aligner to be used to map reads to reference genome.", "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> `Sarek` will build them automatically if not provided.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes.", "hidden": true @@ -557,6 +565,18 @@ "description": "VEP cache version.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "vep_out_format": { + "type": "string", + "default": "vcf", + "description": "VEP output-file format.", + "enum": [ + "json", + "tab", + "vcf" + ], + "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", + "fa_icon": "fas fa-table" + }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", @@ -694,7 +714,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email": { From 383d3d12f1668fb7d0150f168aafec2f547c4302 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Wed, 15 Jun 2022 15:54:43 +0200 Subject: [PATCH 08/12] New version of VEP-module from nf-core/modules --- modules.json | 2 +- modules/nf-core/modules/ensemblvep/Dockerfile | 4 ++-- modules/nf-core/modules/ensemblvep/build.sh | 12 ++++++------ .../modules/ensemblvep/environment.yml | 4 ++-- modules/nf-core/modules/ensemblvep/main.nf | 6 +++++- modules/nf-core/modules/ensemblvep/meta.yml | 19 +++++++++++++++++-- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/modules.json b/modules.json index fab922331e..46418f7c0f 100644 --- a/modules.json +++ b/modules.json @@ -70,7 +70,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "ensemblvep": { - "git_sha": "40dd662fd26c3eb3160b7c8cbbe9bff80bbe2c30" + "git_sha": "30f72e24822576c6f90a0bf9db678b403c70eccf" }, "fastqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile index b4a1c66471..4ada7c6bbb 100644 --- a/modules/nf-core/modules/ensemblvep/Dockerfile +++ b/modules/nf-core/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh index 650c8704e5..6f340c0f3d 100644 --- a/modules/nf-core/modules/ensemblvep/build.sh +++ b/modules/nf-core/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/nf-core/modules/ensemblvep/environment.yml b/modules/nf-core/modules/ensemblvep/environment.yml index c0731c26d8..5df85b805d 100644 --- a/modules/nf-core/modules/ensemblvep/environment.yml +++ b/modules/nf-core/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index ccc6838c38..391a182def 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: @@ -27,14 +28,17 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' - def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ vep \\ -i $vcf \\ -o ${prefix}.ann.${file_extension} \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ diff --git a/modules/nf-core/modules/ensemblvep/meta.yml b/modules/nf-core/modules/ensemblvep/meta.yml index 418bb970d9..a4dde8a6fd 100644 --- a/modules/nf-core/modules/ensemblvep/meta.yml +++ b/modules/nf-core/modules/ensemblvep/meta.yml @@ -1,5 +1,5 @@ name: ENSEMBLVEP -description: Ensembl Variant Effect Predictor (VEP) +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation tools: @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | @@ -44,8 +49,18 @@ output: - vcf: type: file description: | - annotated vcf + annotated vcf (optional) pattern: "*.ann.vcf" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json" - report: type: file description: VEP report file From b776282a148e8bcc4c10e76ab8764df407b804f9 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Wed, 15 Jun 2022 16:08:10 +0200 Subject: [PATCH 09/12] Prettier nextflow_schema.json --- nextflow_schema.json | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 47cd44572d..2dc41543c3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,11 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "step", - "input", - "outdir" - ], + "required": ["step", "input", "outdir"], "properties": { "step": { "type": "string", @@ -203,11 +199,7 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": [ - "bwa-mem", - "bwa-mem2", - "dragmap" - ], + "enum": ["bwa-mem", "bwa-mem2", "dragmap"], "description": "Specify aligner to be used to map reads to reference genome.", "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> `Sarek` will build them automatically if not provided.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes.", "hidden": true @@ -569,11 +561,7 @@ "type": "string", "default": "vcf", "description": "VEP output-file format.", - "enum": [ - "json", - "tab", - "vcf" - ], + "enum": ["json", "tab", "vcf"], "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", "fa_icon": "fas fa-table" }, @@ -714,14 +702,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email": { From e582f1923c2dbcc258ae051c6fa07a61dabc1282 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 16 Jun 2022 12:14:11 +0200 Subject: [PATCH 10/12] Parsing fasta down to VEP-module. Trying to fix snpEff/VEP-related CI-tests. --- conf/modules.config | 2 +- subworkflows/local/annotate.nf | 5 +++-- subworkflows/nf-core/annotation/ensemblvep/main.nf | 3 ++- workflows/sarek.nf | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d8e9efd1ca..b8353337ab 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1049,7 +1049,7 @@ process{ } withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { - ext.prefix = {"${meta.id}_snpEff"} + ext.prefix = {"${meta.id}_snpEff_VEP"} } withName: 'SNPEFF' { diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index dde37b7a3b..a0ee0e9ffa 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -9,6 +9,7 @@ include { ANNOTATION_ENSEMBLVEP } from '../nf-core/annotatio workflow ANNOTATE { take: vcf // channel: [ val(meta), vcf ] + fasta tools snpeff_db snpeff_cache @@ -35,7 +36,7 @@ workflow ANNOTATE { if (tools.contains('merge')) { vcf_ann_for_merge = ANNOTATION_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [meta, vcf] } - ANNOTATION_MERGE(vcf_ann_for_merge, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_MERGE(vcf_ann_for_merge, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_MERGE.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_MERGE.out.vcf_tbi) @@ -43,7 +44,7 @@ workflow ANNOTATE { } if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_ENSEMBLVEP(vcf, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) diff --git a/subworkflows/nf-core/annotation/ensemblvep/main.nf b/subworkflows/nf-core/annotation/ensemblvep/main.nf index 08ada62294..f62553e39e 100644 --- a/subworkflows/nf-core/annotation/ensemblvep/main.nf +++ b/subworkflows/nf-core/annotation/ensemblvep/main.nf @@ -8,6 +8,7 @@ include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgz workflow ANNOTATION_ENSEMBLVEP { take: vcf // channel: [ val(meta), vcf ] + fasta // value: fasta to use vep_genome // value: genome to use vep_species // value: species to use vep_cache_version // value: cache version to use @@ -17,7 +18,7 @@ workflow ANNOTATION_ENSEMBLVEP { main: ch_versions = Channel.empty() - ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, fasta, vep_extra_files) TABIX_BGZIPTABIX(ENSEMBLVEP.out.vcf) // Gather versions of all tools used diff --git a/workflows/sarek.nf b/workflows/sarek.nf index dec5b15ca3..6d5e10944c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -864,6 +864,7 @@ workflow SAREK { if (params.tools.contains('merge') || params.tools.contains('snpeff') || params.tools.contains('vep')) { ANNOTATE(vcf_to_annotate, + params.fasta, params.tools, snpeff_db, snpeff_cache, From 6f2fc9d413325db3622171186cb77420943ae194 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen <37172585+asp8200@users.noreply.github.com> Date: Thu, 16 Jun 2022 12:39:59 +0200 Subject: [PATCH 11/12] Update workflows/sarek.nf Co-authored-by: Maxime U. Garcia --- workflows/sarek.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 6d5e10944c..0290af33bb 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -864,7 +864,7 @@ workflow SAREK { if (params.tools.contains('merge') || params.tools.contains('snpeff') || params.tools.contains('vep')) { ANNOTATE(vcf_to_annotate, - params.fasta, + fasta, params.tools, snpeff_db, snpeff_cache, From bc36841c9e15e90ee9a3a31a06946fc81f64d3d0 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Thu, 16 Jun 2022 15:47:27 +0200 Subject: [PATCH 12/12] Updating CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47021b9e00..97b2d23be6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#540](https://github.com/nf-core/sarek/pull/540) - Add modules and subworkflows for `cnvkit` somatic mode - [#557](https://github.com/nf-core/sarek/pull/557) - Add `Haplotypecaller` single sample mode together with `CNNScoreVariants` and `FilterVariantTranches` - [#576](https://github.com/nf-core/sarek/pull/576) - Add modules and subworkflows for `cnvkit` germline mode +- [#582](https://github.com/nf-core/sarek/pull/582) - Added option `--vep_out_format` for setting the format of the output-file from VEP to `json`, `tab` or `vcf` (default) ### Changed