diff --git a/CHANGELOG.md b/CHANGELOG.md index 07f67f2258..0944a04b05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [#1113](https://github.com/nf-core/sarek/pull/1113) - Adding CNVkit genemetrics module - [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline - [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev diff --git a/conf/base.config b/conf/base.config index db1175874f..446e88b99b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -70,6 +70,10 @@ process { cpus = { check_max( 24 * task.attempt, 'cpus' ) } memory = { check_max( 30.GB * task.attempt, 'memory' ) } } + withName:'CNVKIT_BATCH' { + label = "process_high" + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + } withName: 'GATK4_MARKDUPLICATES|GATK4_MARKDUPLICATESSPARK' { cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 30.GB * task.attempt, 'memory' ) } diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config index f77c6ef446..bf5ff6c6c2 100644 --- a/conf/modules/cnvkit.config +++ b/conf/modules/cnvkit.config @@ -47,4 +47,14 @@ process { pattern: "*{bed,cnn,cnr,cns,pdf,png}" ] } + // CNVKIT + withName: 'CNVKIT_GENEMETRICS' { + ext.prefix = { "${cnr.baseName}.genemetrics" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{tsv}" + ] + } } diff --git a/docs/output.md b/docs/output.md index b03f4c11d0..e82c3c1735 100644 --- a/docs/output.md +++ b/docs/output.md @@ -717,8 +717,9 @@ The file `.cnvs.txt` contains all segments predicte - file containing copy number segment information - `.call.cns` - file containing copy number segment information - - +- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated) +
Output files for tumor/normal samples @@ -745,6 +746,8 @@ The file `.cnvs.txt` contains all segments predicte - file containing copy number segment information - `.call.cns` - file containing copy number segment information +- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated)
#### Control-FREEC diff --git a/modules.json b/modules.json index 06ff0b6dab..d5022b6f91 100644 --- a/modules.json +++ b/modules.json @@ -70,6 +70,11 @@ "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", "installed_by": ["modules"] }, + "cnvkit/genemetrics": { + "branch": "master", + "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", + "installed_by": ["modules"] + }, "cnvkit/reference": { "branch": "master", "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", diff --git a/modules/nf-core/cnvkit/genemetrics/main.nf b/modules/nf-core/cnvkit/genemetrics/main.nf new file mode 100755 index 0000000000..6058994866 --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/main.nf @@ -0,0 +1,39 @@ +process CNVKIT_GENEMETRICS { + tag "$meta.id" + label 'process_low' + + conda "bioconda::cnvkit=0.9.10 bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(cnr), path(cns) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + //tuple val(meta), path("*.cnn"), emit: cnn + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def segments = cns ? "--segment ${cns}" : "" + + """ + cnvkit.py \\ + genemetrics \\ + $cnr \\ + $segments \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/genemetrics/meta.yml b/modules/nf-core/cnvkit/genemetrics/meta.yml new file mode 100755 index 0000000000..115a4a87bb --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/meta.yml @@ -0,0 +1,47 @@ +name: cnvkit_genemetrics +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cnr: + type: file + description: CNR file + pattern: "*.cnr" + - cns: + type: file + description: CNS file [Optional] + pattern: "*.cns" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - txt: + type: file + description: TXT file + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@adamrtalbot" + - "@marrip" + - "@priesgo" diff --git a/subworkflows/local/bam_variant_calling_cnvkit/main.nf b/subworkflows/local/bam_variant_calling_cnvkit/main.nf index f90a2d6233..c6b93d33c9 100644 --- a/subworkflows/local/bam_variant_calling_cnvkit/main.nf +++ b/subworkflows/local/bam_variant_calling_cnvkit/main.nf @@ -5,6 +5,7 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { CNVKIT_BATCH } from '../../../modules/nf-core/cnvkit/batch/main' +include { CNVKIT_GENEMETRICS } from '../../../modules/nf-core/cnvkit/genemetrics/main' workflow BAM_VARIANT_CALLING_CNVKIT { take: @@ -15,11 +16,16 @@ workflow BAM_VARIANT_CALLING_CNVKIT { reference // channel: [] cnn main: + versions = Channel.empty() generate_pon = false CNVKIT_BATCH(cram, fasta, fasta_fai, targets, reference, generate_pon) - versions = CNVKIT_BATCH.out.versions + ch_genemetrics = CNVKIT_BATCH.out.cnr.join(CNVKIT_BATCH.out.cns).map{ meta, cnr, cns -> [meta, cnr, cns[2]]} + CNVKIT_GENEMETRICS(ch_genemetrics) + + versions = versions.mix(CNVKIT_BATCH.out.versions) + versions = versions.mix(CNVKIT_GENEMETRICS.out.versions) emit: versions // channel: [ versions.yml ]