diff --git a/CHANGELOG.md b/CHANGELOG.md index 4830720024..4826d3015e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#511](https://github.com/nf-core/sarek/pull/511) - Sync `TEMPLATE` with `tools` `2.3.2` - [#520](https://github.com/nf-core/sarek/pull/520) - Improve annotation subworkflows - [#537](https://github.com/nf-core/sarek/pull/537) - Update workflow figure +- [#539](https://github.com/nf-core/sarek/pull/539) - Update `CITATIONS.md` ### Fixed @@ -78,6 +79,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools` - [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center` - [#538](https://github.com/nf-core/sarek/pull/538) - `--markdup_java_options` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--annotate_tools` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--cadd_cache`, `--cadd_indels`, `--cadd_indels_tbi`, `--cadd_wg_snvs`, `--cadd_wg_snvs_tbi` have been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--genesplicer` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `conf/genomes.config` and `params.genomes_base` have been removed ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - PĆ„rtejekna diff --git a/CITATIONS.md b/CITATIONS.md index 5cedcdd425..401e0057da 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -71,8 +71,23 @@ > Danecek P, Auton A, Abecasis G, et al.: The variant call format and VCFtools. Bioinformatics. 2011 Aug 1;27(15):2156-8. doi: 10.1093/bioinformatics/btr330. Epub 2011 Jun 7. PubMed PMID: 21653522; PubMed Central PMCID: PMC3137218. - [VEP](https://pubmed.ncbi.nlm.nih.gov/27268795/) + > McLaren W, Gil L, Hunt SE, et al.: The Ensembl Variant Effect Predictor. Genome Biol. 2016 Jun 6;17(1):122. doi: 10.1186/s13059-016-0974-4. PubMed PMID: 27268795; PubMed Central PMCID: PMC4893825. +- [dbNSFP](https://pubmed.ncbi.nlm.nih.gov/33261662/) + + > Liu X, et al.: dbNSFP v4: a comprehensive database of transcript-specific functional predictions and annotations for human nonsynonymous and splice-site SNVs. Genome Med. 2020 Dec 2;12(1):103. doi: 10.1186/s13073-020-00803-9. PubMed PMID: 33261662; PubMed Central PMCID: PMC7709417. + +- [LOFTEE](https://pubmed.ncbi.nlm.nih.gov/32461654/) + + > Karczewski KJ, et al.: The mutational constraint spectrum quantified from variation in 141,456 humans. Nature. 2020 May;581(7809):434-443. doi: 10.1038/s41586-020-2308-7. PubMed PMID: 32461654; PubMed Central PMCID: PMC7334197. + +- [SpliceAI](https://pubmed.ncbi.nlm.nih.gov/30661751/) + + > Jaganathan K, et al.: Predicting Splicing from Primary Sequence with Deep Learning. Cell. 2019 Jan 24;176(3):535-548.e24. doi: 10.1016/j.cell.2018.12.015. PubMed PMID: 30661751. + +- [SpliceRegion](https://github.com/Ensembl/VEP_plugins/blob/release/106/SpliceRegion.pm) + ## R packages - [R](https://www.R-project.org/) @@ -88,6 +103,7 @@ > Trevor L Davis (2018). optparse: Command Line Option Parser. - [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. ## Software packaging/containerisation tools @@ -107,4 +123,5 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/assets/email_template.html b/assets/email_template.html index 5d4bf62f6e..130be84652 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,8 +4,10 @@ - - + nf-core/sarek Pipeline Report diff --git a/conf/genomes.config b/conf/genomes.config deleted file mode 100644 index 7317f16f0f..0000000000 --- a/conf/genomes.config +++ /dev/null @@ -1,36 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for reference genome - * ------------------------------------------------- - * Defines reference genomes, without using iGenome paths - * Can be used by any config that customises the base - * path using $params.genomes_base / --genomes_base - * - * CAREFUL: Some o the files might be reuiqred in the CI tests not yet implemented. They should be gradually moved to the test.config. Until then lets keep this file. - */ - -params { - genomes { - 'minimalGRCh37' { - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - } - 'smallGRCh37' { - dbsnp = "${params.genomes_base}/dbsnp_138.b37.small.vcf.gz" - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - germline_resource = "${params.genomes_base}/gnomAD.r2.1.1.GRCh37.small.PASS.AC.AF.only.vcf.gz" - intervals = "${params.genomes_base}/small.intervals" - known_indels = "${params.genomes_base}/Mills_1000G_gold_standard_and_1000G_phase1.indels.b37.small.vcf.gz" - snpeff_db = 'GRCh37.75' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - vep_cache_version = '104' - } - 'smallerGRCh37' { - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - known_indels = "${params.genomes_base}/dbsnp_138.b37.small.vcf.gz" - } - 'custom' { - fasta = null - } - } -} diff --git a/conf/igenomes.config b/conf/igenomes.config index df2ed7cd09..fbd9881bba 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -28,7 +28,8 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" snpeff_db = 'GRCh37.75' - vep_cache_version = '104' + snpeff_genome = 'GRCh37' + vep_cache_version = 104 vep_genome = 'GRCh37' vep_species = 'homo_sapiens' } @@ -51,7 +52,8 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" snpeff_db = 'GRCh38.99' - vep_cache_version = '104' + snpeff_genome = 'GRCh38' + vep_cache_version = 104 vep_genome = 'GRCh38' vep_species = 'homo_sapiens' } @@ -78,7 +80,8 @@ params { mappability = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Control-FREEC/GRCm38_68_mm10.gem" readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" snpeff_db = 'GRCm38.99' - vep_cache_version = '102' + snpeff_genome = 'GRCm38' + vep_cache_version = 102 vep_genome = 'GRCm38' vep_species = 'mus_musculus' } @@ -101,7 +104,8 @@ params { bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" snpeff_db = 'WBcel235.99' - vep_cache_version = '104' + snpeff_genome = 'WBcel235' + vep_cache_version = 104 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' } diff --git a/conf/modules.config b/conf/modules.config index fedaf85b45..ddc3cad4d2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -963,14 +963,18 @@ process{ withName: 'VCFTOOLS_SUMMARY'{ ext.args = "--FILTER-summary" } -} // ANNOTATE -process { withName: 'ENSEMBLVEP' { - ext.args = '--everything --filter_common --per_gene --total_length --offline' - container = { "nfcore/vep:104.3.${params.genome}" } + ext.args = [ + '--everything --filter_common --per_gene --total_length --offline', + (params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '', + (params.vep_loftee) ? '--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-104.3/share/ensembl-vep-104.3-0' : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '' + ].join(' ').trim() + if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/reports/EnsemblVEP/${meta.id}/${meta.variantcaller}" }, @@ -978,9 +982,13 @@ process { ] } + withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { + ext.prefix = {"${meta.id}_snpEff"} + } + withName: 'SNPEFF' { ext.args = '-nodownload -canon -v' - container = { "nfcore/snpeff:5.0.${params.genome}" } + if (!params.snpeff_cache) container = { params.snpeff_genome ? "nfcore/snpeff:5.0.${params.snpeff_genome}" : "nfcore/snpeff:5.0.${params.genome}" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/reports/SnpEff/${meta.id}/${meta.variantcaller}" }, @@ -989,56 +997,36 @@ process { ] } - withName: 'ANNOTATION_BGZIPTABIX' { + withName: "NFCORE_SAREK:SAREK:ANNOTATE:.*:TABIX_BGZIPTABIX" { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, pattern: "*{gz,gz.tbi}" ] } -} -if (params.tools && (params.tools.contains('snpeff') || params.tools.contains('merge'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_SNPEFF:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_snpEff.ann.vcf"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, - pattern: "*{gz,gz.tbi}", - saveAs: { params.tools.contains('snpeff') ? it : null } - ] - } + withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_SNPEFF:TABIX_BGZIPTABIX' { + ext.prefix = {"${meta.id}_snpEff.ann.vcf"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.tools.contains('snpeff') ? it : null } + ] } -} -if (params.tools && (params.tools.contains('vep'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_VEP.ann.vcf"} - } + withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:TABIX_BGZIPTABIX' { + ext.prefix = {"${meta.id}_VEP.ann.vcf"} } -} -if (params.tools && (params.tools.contains('merge'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_MERGE:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_snpEff_VEP.ann.vcf"} - } + withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_MERGE:TABIX_BGZIPTABIX' { + ext.prefix = {"${meta.id}_snpEff_VEP.ann.vcf"} } -} -process { + // MULTIQC + withName:'MULTIQC' { errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} ext.args = { params.multiqc_config ? "--config $multiqc_custom_config" : "" } } } - -// process { - // withName: CUSTOM_DUMPSOFTWAREVERSIONS { - // publishDir = [ - // mode: params.publish_dir_mode, - // path: { "${params.outdir}/pipeline_info" }, - // pattern: '*_versions.yml' -// } diff --git a/conf/test.config b/conf/test.config index 5e6a864bdc..40560d918b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -9,6 +9,12 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +try { + includeConfig "https://raw.githubusercontent.com/nf-core/modules/master/tests/config/test_data.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/modules test data config") +} + params { config_profile_name = 'Test profile' @@ -23,30 +29,26 @@ params { input = "${baseDir}/tests/csv/3.0/fastq_single.csv" // Small reference genome + genome = null igenomes_ignore = true - genome = 'small_hg38' - genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' - - dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta" - germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list" - known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - nucleotides_per_second = 20 - + dbsnp = params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'] + fasta = params.test_data['homo_sapiens']['genome']['genome_fasta'] + germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'] + intervals = params.test_data['homo_sapiens']['genome']['genome_interval_list'] + known_indels = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'] snpeff_db = 'WBcel235.99' + snpeff_genome = 'WBcel235' + vep_cache_version = 104 + vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' - vep_cache_version = '104' - // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,input' + // Ignore params that will throw warning through params validation + schema_ignore_params = "genomes,test_data" } profiles { annotation { - params.genome = 'WBcel235' - params.igenomes_ignore = false - params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-vcf-https.csv' + params.input = "${baseDir}/tests/csv/3.0/vcf_single.csv" params.step = 'annotate' } no_intervals { @@ -93,60 +95,60 @@ profiles { params.save_split_fastqs = true } targeted { - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.multi_intervals.bed" + params.intervals = params.test_data['homo_sapiens']['genome']['genome_multi_interval_bed'] params.wes = true params.nucleotides_per_second = 20 } tools { params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv" - params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + params.dbsnp = params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'] + params.fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] + params.germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'] + params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] + params.pon = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'] params.step = 'variant_calling' params.joint_germline = true params.wes = true - params.genome = 'WBcel235' - params.vep_genome = 'WBcel235' + + params.nucleotides_per_second = 20 } tools_germline { params.input = "${baseDir}/tests/csv/3.0/recalibrated_germline.csv" - params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + params.dbsnp = params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'] + params.fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] + params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] params.step = 'variant_calling' params.joint_germline = true params.wes = true - params.genome = 'WBcel235' - params.vep_genome = 'WBcel235' + + params.nucleotides_per_second = 20 } tools_tumoronly { params.input = "${baseDir}/tests/csv/3.0/recalibrated_tumoronly.csv" - params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + params.dbsnp = params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'] + params.fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] + params.germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'] + params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] + params.pon = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'] params.step = 'variant_calling' params.joint_germline = true params.wes = true - params.genome = 'WBcel235' - params.vep_genome = 'WBcel235' + + params.nucleotides_per_second = 20 } tools_somatic { params.input = "${baseDir}/tests/csv/3.0/recalibrated_somatic.csv" - params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + params.chr_dir = params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'] + params.dbsnp = params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'] + params.fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] + params.germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'] + params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] + params.pon = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'] params.step = 'variant_calling' params.joint_germline = true params.wes = true - params.genome = 'WBcel235' - params.vep_genome = 'WBcel235' - params.chr_dir = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + + params.nucleotides_per_second = 20 } trimming { params.clip_r1 = 1 @@ -164,19 +166,11 @@ profiles { } variantcalling_channels { params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.fasta = params.test_data['homo_sapiens']['genome']['genome_21_fasta'] + params.intervals = params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'] params.wes = true params.step = 'variant_calling' - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - } -} -//This is apparently useless as it won't overwrite things in the modules.config -process { - withName:ENSEMBLVEP { - maxForks = 1 - } - withName:SNPEFF { - maxForks = 1 + params.nucleotides_per_second = 20 } } diff --git a/main.nf b/main.nf index 7858f4c5d3..2d5788f04d 100644 --- a/main.nf +++ b/main.nf @@ -45,6 +45,7 @@ params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_in params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi') params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability') params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') +params.snpeff_genome = WorkflowMain.getGenomeAttribute(params, 'snpeff_genome') params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome') params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species') diff --git a/modules.json b/modules.json index 0aaca9c814..d792373f86 100644 --- a/modules.json +++ b/modules.json @@ -61,7 +61,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "ensemblvep": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "40dd662fd26c3eb3160b7c8cbbe9bff80bbe2c30" }, "fastqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" @@ -202,7 +202,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "snpeff": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "40dd662fd26c3eb3160b7c8cbbe9bff80bbe2c30" }, "strelka/germline": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile index ac1b469117..b4a1c66471 100644 --- a/modules/nf-core/modules/ensemblvep/Dockerfile +++ b/modules/nf-core/modules/ensemblvep/Dockerfile @@ -8,13 +8,14 @@ LABEL \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-vep-104.3/bin:$PATH - # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=99 +ARG VEP_VERSION=104 +ARG VEP_TAG=104.3 + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH # Download Genome RUN vep_install \ @@ -27,4 +28,4 @@ RUN vep_install \ --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-vep-104.3 > nf-core-vep-104.3.yml +RUN conda env export --name nf-core-vep-${VEP_TAG} > nf-core-vep-${VEP_TAG}.yml diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh index 5fcb91dfe7..650c8704e5 100644 --- a/modules/nf-core/modules/ensemblvep/build.sh +++ b/modules/nf-core/modules/ensemblvep/build.sh @@ -10,11 +10,12 @@ build_push() { VEP_TAG=$4 docker build \ + . \ -t nfcore/vep:${VEP_TAG}.${GENOME} \ - software/vep/. \ --build-arg GENOME=${GENOME} \ --build-arg SPECIES=${SPECIES} \ - --build-arg VEP_VERSION=${VEP_VERSION} + --build-arg VEP_VERSION=${VEP_VERSION} \ + --build-arg VEP_TAG=${VEP_TAG} docker push nfcore/vep:${VEP_TAG}.${GENOME} } diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf index c2bd055fa2..a5a9b1abcc 100644 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ b/modules/nf-core/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path extra_files output: tuple val(meta), path("*.ann.vcf"), emit: vcf diff --git a/modules/nf-core/modules/ensemblvep/meta.yml b/modules/nf-core/modules/ensemblvep/meta.yml index cd9c89054a..418bb970d9 100644 --- a/modules/nf-core/modules/ensemblvep/meta.yml +++ b/modules/nf-core/modules/ensemblvep/meta.yml @@ -10,17 +10,6 @@ tools: homepage: https://www.ensembl.org/info/docs/tools/vep/index.html documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html licence: ["Apache-2.0"] -params: - - use_cache: - type: boolean - description: | - Enable the usage of containers with cache - Does not work with conda - - vep_tag: - type: value - description: | - Specify the tag for the container - https://hub.docker.com/r/nfcore/vep/tags input: - meta: type: map @@ -47,6 +36,10 @@ input: type: file description: | path to VEP cache (optional) + - extra_files: + type: tuple + description: | + path to file(s) needed for plugins (optional) output: - vcf: type: file diff --git a/modules/nf-core/modules/snpeff/Dockerfile b/modules/nf-core/modules/snpeff/Dockerfile index 608716a4de..d0e347573c 100644 --- a/modules/nf-core/modules/snpeff/Dockerfile +++ b/modules/nf-core/modules/snpeff/Dockerfile @@ -8,15 +8,16 @@ LABEL \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-snpeff-5.0/bin:$PATH - # Setup default ARG variables ARG GENOME=GRCh38 ARG SNPEFF_CACHE_VERSION=99 +ARG SNPEFF_TAG=99 + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-snpeff-${SNPEFF_TAG}/bin:$PATH # Download Genome RUN snpEff download -v ${GENOME}.${SNPEFF_CACHE_VERSION} # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-snpeff-5.0 > nf-core-snpeff-5.0.yml +RUN conda env export --name nf-core-snpeff-${SNPEFF_TAG} > nf-core-snpeff-${SNPEFF_TAG}.yml diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh index b94ffd6905..2fccf9a8b4 100644 --- a/modules/nf-core/modules/snpeff/build.sh +++ b/modules/nf-core/modules/snpeff/build.sh @@ -9,10 +9,11 @@ build_push() { SNPEFF_TAG=$3 docker build \ + . \ -t nfcore/snpeff:${SNPEFF_TAG}.${GENOME} \ - software/snpeff/. \ --build-arg GENOME=${GENOME} \ - --build-arg SNPEFF_CACHE_VERSION=${SNPEFF_CACHE_VERSION} + --build-arg SNPEFF_CACHE_VERSION=${SNPEFF_CACHE_VERSION} \ + --build-arg SNPEFF_TAG=${SNPEFF_TAG} docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} } diff --git a/modules/nf-core/modules/snpeff/meta.yml b/modules/nf-core/modules/snpeff/meta.yml index c191b9acd8..2f0d866eeb 100644 --- a/modules/nf-core/modules/snpeff/meta.yml +++ b/modules/nf-core/modules/snpeff/meta.yml @@ -10,18 +10,6 @@ tools: homepage: https://pcingola.github.io/SnpEff/ documentation: https://pcingola.github.io/SnpEff/se_introduction/ licence: ["MIT"] -params: - - use_cache: - type: boolean - description: | - boolean to enable the usage of containers with cache - Enable the usage of containers with cache - Does not work with conda - - snpeff_tag: - type: value - description: | - Specify the tag for the container - https://hub.docker.com/r/nfcore/snpeff/tags input: - meta: type: map diff --git a/nextflow.config b/nextflow.config index 32c35ad057..3c98b3ebb8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,10 +14,9 @@ params { step = 'mapping' // Starts with mapping // Genome and references options - genome = 'GRCh38' + genome = 'GATK.GRCh38' igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - genomes_base = null // Disabled by default save_reference = false // Built references not saved // Main options @@ -70,14 +69,17 @@ params { joint_germline = false // // Annotation - annotate_tools = null // Only with --step annotate + vep_dbnsfp = null // dbnsfp plugin disabled within VEP + dbnsfp = null // No dbnsfp processed file + dbnsfp_tbi = null // No dbnsfp processed file index + vep_loftee = null // loftee plugin disabled within VEP + vep_spliceai = null // spliceai plugin disabled within VEP + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + vep_spliceregion = null // spliceregion plugin disabled within VEP annotation_cache = false // Annotation cache disabled - cadd_cache = null // CADD cache disabled - cadd_indels = null // No CADD InDels file - cadd_indels_tbi = null // No CADD InDels index - cadd_wg_snvs = null // No CADD SNVs file - cadd_wg_snvs_tbi = null // No CADD SNVs index - genesplicer = null // genesplicer disabled within VEP snpeff_cache = null // No directory for snpEff cache vep_cache = null // No directory for VEP cache @@ -220,7 +222,7 @@ trace { } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { diff --git a/nextflow_schema.json b/nextflow_schema.json index e15981ca7a..d72264c160 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -17,7 +17,7 @@ "default": "mapping", "fa_icon": "fas fa-play", "description": "Starting step.", - "help_text": "Only one step", + "help_text": "Only one step.", "enum": [ "mapping", "markduplicates", @@ -50,27 +50,27 @@ "main_options": { "title": "Main options", "type": "object", - "description": "Option used for most of the pipeline", + "description": "Option used for most of the pipeline.", "default": "", "properties": { "tools": { "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for variant calling and/or for annotation.", - "help_text": "Multiple separated with commas.\n\nGermline variant calling can currently only be performed with the following variant callers:\n- FreeBayes, HaplotypeCaller, Manta, mpileup, Strelka, TIDDIT\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- ASCAT, Control-FREEC, FreeBayes, Manta, MSIsensorpro, Mutect2, Strelka\n\nTumor-only somatic variant calling can currently only be performed with the following variant callers:\n- Control-FREEC, Manta, mpileup, Mutect2, TIDDIT\n\nAnnotation is done using snpEff, VEP, or even both consecutively.\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted.\n\n\n\n`DNAseq`, `DNAscope` and `TNscope` are only available with `--sentieon`\n\n> **NB** tools can be specified with no concern for case.\n", + "help_text": "Multiple separated with commas.\n\nGermline variant calling can currently only be performed with the following variant callers:\n- FreeBayes, HaplotypeCaller, Manta, mpileup, Strelka, TIDDIT\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- ASCAT, Control-FREEC, FreeBayes, Manta, MSIsensorpro, Mutect2, Strelka\n\nTumor-only somatic variant calling can currently only be performed with the following variant callers:\n- Control-FREEC, Manta, mpileup, Mutect2, TIDDIT\n\nAnnotation is done using snpEff, VEP, or even both consecutively.\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted.\n\n\n\n`DNAseq`, `DNAscope` and `TNscope` are only available with `--sentieon`\n\n> **NB** tools can be specified with no concern for case.", "pattern": "^((ascat|cnvkit|controlfreec|deepvariant|dnascope|dnaseq|freebayes|haplotypecaller|manta|merge|mpileup|msisensorpro|mutect2|snpeff|strelka|tiddit|tnscope|vep)*,?)*$" }, "no_intervals": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Disable usage of intervals.", - "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling" + "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling." }, "nucleotides_per_second": { "type": "number", "fa_icon": "fas fa-clock", "description": "Estimate interval size.", - "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling", + "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling.", "default": 1000 }, "sentieon": { @@ -97,7 +97,7 @@ "wes": { "type": "boolean", "fa_icon": "fas fa-dna", - "description": "Enable when exome or panel data is provided" + "description": "Enable when exome or panel data is provided." } }, "fa_icon": "fas fa-user-cog" @@ -113,22 +113,22 @@ "type": "boolean", "fa_icon": "fas fa-cut", "description": "Run Trim Galore.", - "hidden": true, - "help_text": "Use this to perform adapter trimming with Trim Galore.\ncf [Trim Galore User Guide](https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md)" + "help_text": "Use this to perform adapter trimming with Trim Galore.\ncf [Trim Galore User Guide](https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md).", + "hidden": true }, "clip_r1": { "type": "integer", "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 5' end of read 1.", - "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.\n", + "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.", "hidden": true }, "clip_r2": { "type": "integer", "default": 0, "description": "Remove bp from the 5' end of read 2.", - "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.\n", + "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.", "fa_icon": "fas fa-cut", "hidden": true }, @@ -137,7 +137,7 @@ "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.", - "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.\n", + "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.", "hidden": true }, "three_prime_clip_r2": { @@ -145,7 +145,7 @@ "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed.", - "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.\n", + "help_text": "This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.", "hidden": true }, "trim_nextseq": { @@ -159,21 +159,21 @@ "save_trimmed": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Save trimmed FastQ file intermediates", + "description": "Save trimmed FastQ file intermediates.", "hidden": true }, "split_fastq": { "type": "integer", "default": 0, "fa_icon": "fas fa-cut", - "description": "Specify how many reads each split of a FastQ file contains. Set 0 to turn of splitting at all", - "help_text": "Use the the tools seqkit/split2 to split FASTQ file by number of reads", + "description": "Specify how many reads each split of a FastQ file contains. Set 0 to turn of splitting at all.", + "help_text": "Use the the tools seqkit/split2 to split FASTQ file by number of reads.", "hidden": true }, "save_split_fastqs": { "type": "boolean", "fa_icon": "fas fa-vial", - "description": "If set, publishes split fastq files. Intended for testing purposes.", + "description": "If set, publishes split FASTQ files. Intended for testing purposes.", "hidden": true }, "umi_read_structure": { @@ -201,20 +201,20 @@ "fa_icon": "fas fa-puzzle-piece", "enum": ["bwa-mem", "bwa-mem2", "dragmap"], "description": "Specify aligner to be used to map reads to reference genome.", - "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes", + "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> `Sarek` will build them automatically if not provided.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes.", "hidden": true }, "use_gatk_spark": { "type": "string", "fa_icon": "fas fa-forward", - "description": "Tools for which to enable usage of GATK Spark implementation", + "description": "Tools for which to enable usage of GATK Spark implementation.", "help_text": "Multiple separated with commas.\n\n GATK4 BQSR tools are currently only available as Beta release. Use with caution!", "pattern": "^((baserecalibrator|markduplicates)*,?)*$" }, "save_bam_mapped": { "type": "boolean", "fa_icon": "fas fa-download", - "description": "Save Mapped BAMs" + "description": "Save Mapped BAMs." } } }, @@ -234,14 +234,14 @@ "type": "number", "fa_icon": "fas fa-bacon", "default": 2, - "hidden": true, - "description": "genome ploidy; In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs" + "description": "genome ploidy; In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs.", + "hidden": true }, "ascat_purity": { "type": "number", "fa_icon": "fas fa-wrench", - "description": "Overwrite ASCAT purity", - "help_text": "Requires that `--ascat_ploidy` is set" + "description": "Overwrite ASCAT purity.", + "help_text": "Requires that `--ascat_ploidy` is set." }, "cf_coeff": { "type": "number", @@ -268,54 +268,54 @@ "default": 0, "fa_icon": "fas fa-greater-than", "hidden": true, - "description": "Minimal sequencing quality for a position to be considered in BAF analysis" + "description": "Minimal sequencing quality for a position to be considered in BAF analysis." }, "cf_mincov": { "type": "number", "default": 0, "fa_icon": "fas fa-align-center", "hidden": true, - "description": "Minimal read coverage for a position to be considered in BAF analysis" + "description": "Minimal read coverage for a position to be considered in BAF analysis." }, "cf_window": { "type": "number", "fa_icon": "fas fa-wrench", - "description": "Overwrite Control-FREEC window size", - "help_text": "It is recommended to use a window size of 0 for exome data", + "description": "Overwrite Control-FREEC window size.", + "help_text": "It is recommended to use a window size of 0 for exome data.", "hidden": true }, "joint_germline": { "type": "boolean", "fa_icon": "fas fa-align-justify", - "description": "Enables GATK4 joint germline variant calling, if also haplotypecaller is selected" + "description": "Enables GATK4 joint germline variant calling, if also haplotypecaller is selected." }, "generate_gvcf": { "type": "boolean", "fa_icon": "fas fa-copy", - "description": "Generate g.vcf output from GATK HaplotypeCaller" + "description": "Generate g.vcf output from GATK HaplotypeCaller." }, "no_strelka_bp": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Will not use Manta candidateSmallIndels for Strelka", - "help_text": "Not recommended by Best Practices" + "description": "Will not use Manta candidateSmallIndels for Strelka.", + "help_text": "Not recommended by Best Practices." }, "pon": { "type": "string", "fa_icon": "fas fa-file", - "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2 / Sentieon TNscope", + "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2 / Sentieon TNscope.", "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is recommended to make your own PON, as it depends on sequencer and library preparation.\nFor tests in iGenomes there is a dummy PON file in the Annotation/GermlineResource directory, but it should not be used as a real PON file.\n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." }, "pon_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Index of PON panel-of-normals VCF", + "description": "Index of PON panel-of-normals VCF.", "help_text": "If none provided, will be generated automatically from the PON bgzipped VCF file." }, "ignore_soft_clipped_bases": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Do not analyze soft clipped bases in the reads for GATK Mutect2", + "description": "Do not analyze soft clipped bases in the reads for GATK Mutect2.", "help_text": "use the `--dont-use-soft-clipped-bases` params with GATK." } } @@ -327,69 +327,91 @@ "default": "", "fa_icon": "fas fa-toolbox", "properties": { - "annotate_tools": { + "vep_dbnsfp": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP dbNSFP plugin.", + "hidden": true + }, + "dbnsfp": { "type": "string", - "fa_icon": "fas fa-hammer", - "description": "Specify from which tools Sarek should look for VCF files to annotate", - "help_text": "Only for step `annotate`", - "pattern": "^((haplotypecaller|manta|mutect2|strelka|tiddit)*(,)*)*$", + "fa_icon": "fas fa-database", + "description": "Path to dbNSFP processed file.", + "help_text": "To be used with `--vep_dbnsfp`.", "hidden": true }, - "annotation_cache": { + "dbnsfp_tbi": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "Path to dbNSFP tabix indexed file.", + "help_text": "To be used with `--vep_dbnsfp`.", + "hidden": true + }, + "vep_loftee": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable the use of cache for annotation", - "help_text": "And disable usage of Sarek snpeff and vep specific containers for annotation\n\nTo be used with `--snpeff_cache` and/or `--vep_cache`", + "description": "Enable the use of the VEP LOFTEE plugin.", "hidden": true }, - "cadd_cache": { + "vep_spliceai": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable CADD cache.", + "description": "Enable the use of the VEP SpliceAI plugin.", "hidden": true }, - "cadd_indels": { + "spliceai_snv": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels file.", + "fa_icon": "fas fa-database", + "description": "Path to spliceai raw scores snv file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "cadd_indels_tbi": { + "spliceai_snv_tbi": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels index.", + "fa_icon": "fas fa-database", + "description": "Path to spliceai raw scores snv tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "cadd_wg_snvs": { + "spliceai_indel": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs file.", + "fa_icon": "fas fa-database", + "description": "Path to spliceai raw scores indel file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "cadd_wg_snvs_tbi": { + "spliceai_indel_tbi": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs index.", + "fa_icon": "fas fa-database", + "description": "Path to spliceai raw scores indel tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", + "hidden": true + }, + "vep_spliceregion": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP SpliceRegion plugin.", "hidden": true }, - "genesplicer": { + "annotation_cache": { "type": "boolean", - "fa_icon": "fas fa-gavel", - "description": "Enable the use of the VEP GeneSplicer plugin.", + "fa_icon": "fas fa-database", + "description": "Enable the use of cache for annotation.", + "help_text": "And disable usage of Sarek snpeff and vep specific containers for annotation\n\nTo be used with `--snpeff_cache` and/or `--vep_cache`.", "hidden": true }, "snpeff_cache": { "type": "string", "fa_icon": "fas fa-database", - "description": "Path to snpEff cache", - "help_text": "To be used with `--annotation_cache`", + "description": "Path to snpEff cache.", + "help_text": "To be used with `--annotation_cache`.", "hidden": true }, "vep_cache": { "type": "string", "fa_icon": "fas fa-database", - "description": "Path to VEP cache", - "help_text": "To be used with `--annotation_cache`", + "description": "Path to VEP cache.", + "help_text": "To be used with `--annotation_cache`.", "hidden": true } } @@ -403,8 +425,9 @@ "genome": { "type": "string", "description": "Name of iGenomes reference.", + "default": "GATK.GRCh38", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.\n" + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "ac_loci": { "type": "string", @@ -420,13 +443,13 @@ "type": "string", "fa_icon": "fas fa-copy", "description": "Path to BWA mem indices.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference." + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs." }, "bwamem2": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Path to bwamem2 mem indices.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference, if --aligner bwamem-2 is specified." + "description": "Path to bwa-mem2 mem indices.", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." }, "chr_dir": { "type": "string", @@ -442,19 +465,19 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp index.", - "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file." + "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs." }, "dict": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to FASTA dictionary file.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference." + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs." }, "dragmap": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to dragmap indices.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference, if --aligner dragmap is specified" + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs." }, "fasta": { "type": "string", @@ -462,97 +485,98 @@ "mimetype": "text/plain", "pattern": "\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified.", "fa_icon": "far fa-file-code" }, "fasta_fai": { "type": "string", "fa_icon": "fas fa-file", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.", "description": "Path to FASTA reference index." }, "germline_resource": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to GATK Mutect2 Germline Resource File", + "description": "Path to GATK Mutect2 Germline Resource File.", "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder." }, "germline_resource_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to GATK Mutect2 Germline Resource Index", - "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided" + "description": "Path to GATK Mutect2 Germline Resource Index.", + "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs." }, "intervals": { "type": "string", "fa_icon": "fas fa-file-alt", - "help_text": "To speed up some preprocessing and variant calling processes, the reference is chopped into smaller pieces.\nThe intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\nParts of preprocessing and variant calling are done by these intervals, and the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 1000 nucleotides per second is assumed.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation", - "description": "Path to intervals file" + "help_text": "To speed up some preprocessing and variant calling processes, the reference is chopped into smaller pieces.\nThe intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\nParts of preprocessing and variant calling are done by these intervals, and the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 1000 nucleotides per second is assumed.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation.", + "description": "Path to intervals file." }, "known_indels": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Path to known indels file" + "description": "Path to known indels file." }, "known_indels_tbi": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Path to known indels file index", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided" + "description": "Path to known indels file index.", + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs." }, "mappability": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to Control-FREEC mappability file" + "description": "Path to Control-FREEC mappability file." }, "snpeff_db": { "type": "string", "fa_icon": "fas fa-database", - "description": "snpEff DB version" + "description": "snpEff DB version." + }, + "snpeff_genome": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "snpeff genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "vep_genome": { "type": "string", "fa_icon": "fas fa-microscope", - "description": "VEP genome", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." + "description": "VEP genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "vep_species": { "type": "string", "fa_icon": "fas fa-microscope", - "description": "VEP species", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." + "description": "VEP species.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "vep_cache_version": { - "type": "string", + "type": "number", "fa_icon": "fas fa-tag", - "description": "VEP cache version" + "description": "VEP cache version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", - "description": "Save built references" + "description": "Save built references." }, "igenomes_base": { "type": "string", "format": "directory-path", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", + "default": "s3://ngi-igenomes/igenomes/", "fa_icon": "fas fa-cloud-download-alt" }, - "genomes_base": { - "type": "string", - "fa_icon": "fas fa-map-marker-alt", - "description": "Directory / URL base for genomes references.", - "help_text": "All files are supposed to be in the same folder" - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline.\nYou may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.\nThis option will load the `genomes.config` file instead.\n\n> **NB** You can then specify the genome custom and specify at least a FASTA genome file." + "help_text": "Do not load `igenomes.config` when running the pipeline.\nYou may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.\n\n> **NB** You can then run `Sarek` by specifying at least a FASTA genome file." } }, - "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/\n" + "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." }, "institutional_config_options": { "title": "Institutional config options", @@ -611,7 +635,7 @@ "fa_icon": "fas fa-university", "default": "ILLUMINA", "description": "Sequencing platform information to be added to read group (PL field).", - "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis", + "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis.", "hidden": true } } @@ -629,7 +653,7 @@ "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`." }, "max_memory": { "type": "string", @@ -638,7 +662,7 @@ "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`." }, "max_time": { "type": "string", @@ -647,7 +671,7 @@ "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`." } } }, diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index 28ea88d888..a9c386b011 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -16,6 +16,7 @@ workflow ANNOTATE { vep_species vep_cache_version vep_cache + vep_extra_files main: ch_reports = Channel.empty() @@ -32,7 +33,7 @@ workflow ANNOTATE { if (tools.contains('merge')) { vcf_ann_for_merge = ANNOTATION_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [meta, vcf] } - ANNOTATION_MERGE(vcf_ann_for_merge, vep_genome, vep_species, vep_cache_version, vep_cache) + ANNOTATION_MERGE(vcf_ann_for_merge, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_MERGE.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_MERGE.out.vcf_tbi) @@ -40,7 +41,7 @@ workflow ANNOTATE { } if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache) + ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) diff --git a/subworkflows/nf-core/annotation/ensemblvep/main.nf b/subworkflows/nf-core/annotation/ensemblvep/main.nf index f1e326e8ba..4c7d0e3e69 100644 --- a/subworkflows/nf-core/annotation/ensemblvep/main.nf +++ b/subworkflows/nf-core/annotation/ensemblvep/main.nf @@ -2,29 +2,30 @@ // Run VEP to annotate VCF files // -include { ENSEMBLVEP } from '../../../../modules/nf-core/modules/ensemblvep/main' -include { TABIX_BGZIPTABIX as ANNOTATION_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' +include { ENSEMBLVEP } from '../../../../modules/nf-core/modules/ensemblvep/main' +include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' workflow ANNOTATION_ENSEMBLVEP { take: vcf // channel: [ val(meta), vcf ] - vep_genome // value: which genome - vep_species // value: which species - vep_cache_version // value: which cache version - vep_cache // path: path_to_vep_cache (optionnal) + vep_genome // value: genome to use + vep_species // value: species to use + vep_cache_version // value: cache version to use + vep_cache // path: /path/to/vep/cache (optionnal) + vep_extra_files // channel: [ file1, file2...] (optionnal) main: ch_versions = Channel.empty() - ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache) - ANNOTATION_BGZIPTABIX(ENSEMBLVEP.out.vcf) + ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + TABIX_BGZIPTABIX(ENSEMBLVEP.out.vcf) // Gather versions of all tools used ch_versions = ch_versions.mix(ENSEMBLVEP.out.versions.first()) - ch_versions = ch_versions.mix(ANNOTATION_BGZIPTABIX.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) emit: - vcf_tbi = ANNOTATION_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = ENSEMBLVEP.out.report // path: *.html - versions = ch_versions // path: versions.yml + vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + reports = ENSEMBLVEP.out.report // path: *.html + versions = ch_versions // path: versions.yml } diff --git a/subworkflows/nf-core/annotation/snpeff/main.nf b/subworkflows/nf-core/annotation/snpeff/main.nf index e5a020603e..54bfb9caa2 100644 --- a/subworkflows/nf-core/annotation/snpeff/main.nf +++ b/subworkflows/nf-core/annotation/snpeff/main.nf @@ -2,27 +2,27 @@ // Run SNPEFF to annotate VCF files // -include { SNPEFF } from '../../../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX as ANNOTATION_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' +include { SNPEFF } from '../../../../modules/nf-core/modules/snpeff/main' +include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' workflow ANNOTATION_SNPEFF { take: - vcf // channel: [ val(meta), vcf ] - snpeff_db // value: version of db to use - snpeff_cache // path: path_to_snpeff_cache (optionnal) + vcf // channel: [ val(meta), vcf ] + snpeff_db // value: db version to use + snpeff_cache // path: /path/to/snpeff/cache (optionnal) main: ch_versions = Channel.empty() SNPEFF(vcf, snpeff_db, snpeff_cache) - ANNOTATION_BGZIPTABIX(SNPEFF.out.vcf) + TABIX_BGZIPTABIX(SNPEFF.out.vcf) // Gather versions of all tools used ch_versions = ch_versions.mix(SNPEFF.out.versions.first()) - ch_versions = ch_versions.mix(ANNOTATION_BGZIPTABIX.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) emit: - vcf_tbi = ANNOTATION_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // path: versions.yml + vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + reports = SNPEFF.out.report // path: *.html + versions = ch_versions // path: versions.yml } diff --git a/tests/csv/3.0/vcf_single.csv b/tests/csv/3.0/vcf_single.csv new file mode 100644 index 0000000000..601e72f60f --- /dev/null +++ b/tests/csv/3.0/vcf_single.csv @@ -0,0 +1,2 @@ +patient,sample,vcf +test,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test.vcf.gz diff --git a/tests/test_annotation.yml b/tests/test_annotation.yml index 0fc517804a..f02bd80586 100644 --- a/tests/test_annotation.yml +++ b/tests/test_annotation.yml @@ -4,28 +4,26 @@ - annotation - snpeff files: - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz.tbi - - path: results/reports/SnpEff/1234N/1234N.csv - # - path: results/multiqc //MultiQC not working (finishes succesfully, but log shows issues between human vcf and annotation) + - path: results/annotation/test/test_snpEff.ann.vcf.gz + - path: results/annotation/test/test_snpEff.ann.vcf.gz.tbi + - path: results/reports/SnpEff/test/test.csv + - path: results/multiqc - name: Run VEP - command: nextflow run main.nf -profile test,annotation,docker --tools vep + command: nextflow run main.nf -profile test,annotation,docker --tools vep --skip_tools multiqc tags: - annotation - vep files: - - path: results/annotation/1234N/1234N_VEP.ann.vcf.gz - - path: results/annotation/1234N/1234N_VEP.ann.vcf.gz.tbi - - path: results/reports/EnsemblVEP/1234N/1234N.summary.html - # - path: results/multiqc //MultiQC not working issues between human vcf and annotation + - path: results/annotation/test/test_VEP.ann.vcf.gz + - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi + - path: results/reports/EnsemblVEP/test/test.summary.html - name: Run snpEff followed by VEP - command: nextflow run main.nf -profile test,annotation,docker --tools merge + command: nextflow run main.nf -profile test,annotation,docker --tools merge --skip_tools multiqc tags: - annotation - merge - snpeff - vep files: - - path: results/annotation/1234N/1234N_snpEff_VEP.ann.vcf.gz - - path: results/annotation/1234N/1234N_snpEff_VEP.ann.vcf.gz.tbi - # - path: results/multiqc //MultiQC not working issues between human vcf and annotation + - path: results/annotation/test/test_snpEff_VEP.ann.vcf.gz + - path: results/annotation/test/test_snpEff_VEP.ann.vcf.gz.tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index ad2e161937..68ce35299c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -15,11 +15,9 @@ def checkPathParamList = [ params.ac_loci_gc, params.bwa, params.bwamem2, - params.cadd_indels, - params.cadd_indels_tbi, - params.cadd_wg_snvs, - params.cadd_wg_snvs_tbi, params.chr_dir, + params.dbnsfp, + params.dbnsfp_tbi, params.dbsnp, params.dbsnp_tbi, params.dict, @@ -37,6 +35,10 @@ def checkPathParamList = [ params.pon, params.pon_tbi, params.snpeff_cache, + params.spliceai_indel, + params.spliceai_indel_tbi, + params.spliceai_snv, + params.spliceai_snv_tbi, //params.target_bed, params.vep_cache ] @@ -98,15 +100,29 @@ vep_genome = params.vep_genome ?: Channel.empty() vep_species = params.vep_species ?: Channel.empty() // Initialize files channels based on params, not defined within the params.genomes[params.genome] scope -cadd_indels = params.cadd_indels ? Channel.fromPath(params.cadd_indels).collect() : [] -cadd_indels_tbi = params.cadd_indels_tbi ? Channel.fromPath(params.cadd_indels_tbi).collect() : [] -cadd_wg_snvs = params.cadd_wg_snvs ? Channel.fromPath(params.cadd_wg_snvs).collect() : [] -cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? Channel.fromPath(params.cadd_wg_snvs_tbi).collect() : [] pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.empty() snpeff_cache = params.snpeff_cache ? Channel.fromPath(params.snpeff_cache).collect() : [] //target_bed = params.target_bed ? Channel.fromPath(params.target_bed).collect() : [] vep_cache = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : [] +vep_extra_files = [] + +if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files = vep_extra_files.mix( + Channel.fromPath(params.dbnsfp), + Channel.fromPath(params.dbnsfp_tbi) + ).collect() +} + +if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files = vep_extra_files.mix( + Channel.fromPath(params.spliceai_indel), + Channel.fromPath(params.spliceai_indel_tbi), + Channel.fromPath(params.spliceai_snv), + Channel.fromPath(params.spliceai_snv_tbi) + ).collect() +} + // Initialize value channels based on params, not defined within the params.genomes[params.genome] scope umi_read_structure = params.umi_read_structure ? "${params.umi_read_structure} ${params.umi_read_structure}" : Channel.empty() @@ -861,7 +877,8 @@ workflow SAREK { vep_genome, vep_species, vep_cache_version, - vep_cache) + vep_cache, + vep_extra_files) // Gather used softwares versions ch_versions = ch_versions.mix(ANNOTATE.out.versions)