diff --git a/CHANGELOG.md b/CHANGELOG.md index 47021b9e00..97b2d23be6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#540](https://github.com/nf-core/sarek/pull/540) - Add modules and subworkflows for `cnvkit` somatic mode - [#557](https://github.com/nf-core/sarek/pull/557) - Add `Haplotypecaller` single sample mode together with `CNNScoreVariants` and `FilterVariantTranches` - [#576](https://github.com/nf-core/sarek/pull/576) - Add modules and subworkflows for `cnvkit` germline mode +- [#582](https://github.com/nf-core/sarek/pull/582) - Added option `--vep_out_format` for setting the format of the output-file from VEP to `json`, `tab` or `vcf` (default) ### Changed diff --git a/conf/modules.config b/conf/modules.config index f276b567a9..e37e1ec047 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1044,18 +1044,30 @@ process{ (params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '', (params.vep_loftee) ? '--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-104.3/share/ensembl-vep-104.3-0' : '', (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '', - (params.vep_spliceregion) ? '--plugin SpliceRegion' : '' + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' ].join(' ').trim() if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/EnsemblVEP/${meta.id}/${meta.variantcaller}" }, - pattern: "*html" + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.id}/${meta.variantcaller}" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, + pattern: "*{json,tab}" + ] ] } + withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ENSEMBLVEP' { + ext.prefix = {"${meta.id}_VEP"} + } + withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { - // Output file will have format *_snpEff_VEP.ann.vcf + // Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" } } diff --git a/modules.json b/modules.json index e323da9ddb..b1b84ef1d9 100644 --- a/modules.json +++ b/modules.json @@ -67,7 +67,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "ensemblvep": { - "git_sha": "40dd662fd26c3eb3160b7c8cbbe9bff80bbe2c30" + "git_sha": "30f72e24822576c6f90a0bf9db678b403c70eccf" }, "fastqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile index b4a1c66471..4ada7c6bbb 100644 --- a/modules/nf-core/modules/ensemblvep/Dockerfile +++ b/modules/nf-core/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh index 650c8704e5..6f340c0f3d 100644 --- a/modules/nf-core/modules/ensemblvep/build.sh +++ b/modules/nf-core/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/nf-core/modules/ensemblvep/environment.yml b/modules/nf-core/modules/ensemblvep/environment.yml index c0731c26d8..5df85b805d 100644 --- a/modules/nf-core/modules/ensemblvep/environment.yml +++ b/modules/nf-core/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index a5a9b1abcc..391a182def 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -13,37 +13,40 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: - tuple val(meta), path("*.ann.vcf"), emit: vcf - path "*.summary.html" , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.ann.vcf") , optional:true, emit: vcf + tuple val(meta), path("*.ann.tab") , optional:true, emit: tab + tuple val(meta), path("*.ann.json") , optional:true, emit: json + path "*.summary.html" , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" - """ - mkdir $prefix + def reference = fasta ? "--fasta $fasta" : "" + """ vep \\ -i $vcf \\ - -o ${prefix}.ann.vcf \\ + -o ${prefix}.ann.${file_extension} \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ --cache_version $cache_version \\ --dir_cache $dir_cache \\ --fork $task.cpus \\ - --vcf \\ - --stats_file ${prefix}.summary.html + --stats_file ${prefix}.summary.html \\ - rm -rf $prefix cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/ensemblvep/meta.yml b/modules/nf-core/modules/ensemblvep/meta.yml index 418bb970d9..a4dde8a6fd 100644 --- a/modules/nf-core/modules/ensemblvep/meta.yml +++ b/modules/nf-core/modules/ensemblvep/meta.yml @@ -1,5 +1,5 @@ name: ENSEMBLVEP -description: Ensembl Variant Effect Predictor (VEP) +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation tools: @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | @@ -44,8 +49,18 @@ output: - vcf: type: file description: | - annotated vcf + annotated vcf (optional) pattern: "*.ann.vcf" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json" - report: type: file description: VEP report file diff --git a/nextflow.config b/nextflow.config index 8e3d38abdd..66e4ee79a4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,7 @@ params { // Annotation + vep_out_format = 'vcf' vep_dbnsfp = null // dbnsfp plugin disabled within VEP dbnsfp = null // No dbnsfp processed file dbnsfp_tbi = null // No dbnsfp processed file index diff --git a/nextflow_schema.json b/nextflow_schema.json index a352bdc710..5c9020e4de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -538,6 +538,14 @@ "description": "VEP cache version.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "vep_out_format": { + "type": "string", + "default": "vcf", + "description": "VEP output-file format.", + "enum": ["json", "tab", "vcf"], + "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", + "fa_icon": "fas fa-table" + }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 50a1476e47..52532ac835 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -8,20 +8,23 @@ include { ANNOTATION_ENSEMBLVEP } from '../nf-core/annotatio workflow ANNOTATE { take: - vcf // channel: [ val(meta), vcf ] - tools // Mandatory, list of tools to apply - snpeff_db - snpeff_cache - vep_genome - vep_species - vep_cache_version - vep_cache - vep_extra_files - + vcf // channel: [ val(meta), vcf ] + fasta + tools // Mandatory, list of tools to apply + snpeff_db + snpeff_cache + vep_genome + vep_species + vep_cache_version + vep_cache + vep_extra_files + main: - ch_reports = Channel.empty() - ch_vcf_ann = Channel.empty() - ch_versions = Channel.empty() + ch_reports = Channel.empty() + ch_vcf_ann = Channel.empty() + ch_tab_ann = Channel.empty() + ch_json_ann = Channel.empty() + ch_versions = Channel.empty() if (tools.contains('merge') || tools.contains('snpeff')) { ANNOTATION_SNPEFF(vcf, snpeff_db, snpeff_cache) @@ -33,7 +36,7 @@ workflow ANNOTATE { if (tools.contains('merge')) { vcf_ann_for_merge = ANNOTATION_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [meta, vcf] } - ANNOTATION_MERGE(vcf_ann_for_merge, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_MERGE(vcf_ann_for_merge, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_MERGE.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_MERGE.out.vcf_tbi) @@ -41,15 +44,19 @@ workflow ANNOTATE { } if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ANNOTATION_ENSEMBLVEP(vcf, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) - ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) - ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) - ch_versions = ch_versions.mix(ANNOTATION_ENSEMBLVEP.out.versions.first()) + ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) + ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) + ch_tab_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.tab) + ch_json_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.json) + ch_versions = ch_versions.mix(ANNOTATION_ENSEMBLVEP.out.versions.first()) } emit: - vcf_ann = ch_vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = ch_reports // path: *.html - versions = ch_versions // path: versions.yml + vcf_ann = ch_vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + tab_ann = ch_tab_ann + json_ann = ch_json_ann + reports = ch_reports // path: *.html + versions = ch_versions // path: versions.yml } diff --git a/subworkflows/nf-core/annotation/ensemblvep/main.nf b/subworkflows/nf-core/annotation/ensemblvep/main.nf index 4c7d0e3e69..f62553e39e 100644 --- a/subworkflows/nf-core/annotation/ensemblvep/main.nf +++ b/subworkflows/nf-core/annotation/ensemblvep/main.nf @@ -8,6 +8,7 @@ include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgz workflow ANNOTATION_ENSEMBLVEP { take: vcf // channel: [ val(meta), vcf ] + fasta // value: fasta to use vep_genome // value: genome to use vep_species // value: species to use vep_cache_version // value: cache version to use @@ -17,7 +18,7 @@ workflow ANNOTATION_ENSEMBLVEP { main: ch_versions = Channel.empty() - ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, fasta, vep_extra_files) TABIX_BGZIPTABIX(ENSEMBLVEP.out.vcf) // Gather versions of all tools used @@ -26,6 +27,8 @@ workflow ANNOTATION_ENSEMBLVEP { emit: vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + json = ENSEMBLVEP.out.json + tab = ENSEMBLVEP.out.tab reports = ENSEMBLVEP.out.report // path: *.html versions = ch_versions // path: versions.yml } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 07acfa7d03..11f5f5ab1b 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -848,6 +848,7 @@ workflow SAREK { if (params.tools.contains('merge') || params.tools.contains('snpeff') || params.tools.contains('vep')) { ANNOTATE(vcf_to_annotate, + fasta, params.tools, snpeff_db, snpeff_cache,