diff --git a/modules/nf-core/glimpse/chunk/meta.yml b/modules/nf-core/glimpse/chunk/meta.yml index 9e77ddeb1da..9ac840a6e39 100644 --- a/modules/nf-core/glimpse/chunk/meta.yml +++ b/modules/nf-core/glimpse/chunk/meta.yml @@ -3,6 +3,7 @@ description: Defines chunks where to run imputation keywords: - chunk - imputation + - low coverage tools: - "glimpse": description: "GLIMPSE is a phasing and imputation method for large-scale low-coverage sequencing studies." diff --git a/modules/nf-core/glimpse/phase/main.nf b/modules/nf-core/glimpse/phase/main.nf index b936f0bd08c..894d8f1ec6a 100644 --- a/modules/nf-core/glimpse/phase/main.nf +++ b/modules/nf-core/glimpse/phase/main.nf @@ -8,7 +8,7 @@ process GLIMPSE_PHASE { 'quay.io/biocontainers/glimpse-bio:1.1.1--hce55b13_1' }" input: - tuple val(meta) , path(input), path(input_index), val(input_region), val(output_region), path(reference), path(reference_index), path(map), path(samples_file) + tuple val(meta) , path(input), path(input_index), path(samples_file), val(input_region), val(output_region), path(reference), path(reference_index), path(map) output: tuple val(meta), path("*.{vcf,bcf,vcf.gz,bcf.gz}"), emit: phased_variant diff --git a/modules/nf-core/glimpse/phase/meta.yml b/modules/nf-core/glimpse/phase/meta.yml index b17567de76d..47da4de6ec6 100644 --- a/modules/nf-core/glimpse/phase/meta.yml +++ b/modules/nf-core/glimpse/phase/meta.yml @@ -3,6 +3,9 @@ description: main GLIMPSE algorithm, performs phasing and imputation refining ge keywords: - phase - imputation + - low-coverage + - glimpse + tools: - "glimpse": description: "GLIMPSE is a phasing and imputation method for large-scale low-coverage sequencing studies." @@ -29,6 +32,15 @@ input: description: Index file of the input VCF/BCF file containing genotype likelihoods. pattern: "*.{vcf.gz.csi,bcf.gz.csi}" + - samples_file: + type: file + description: | + File with sample names and ploidy information. + One sample per line with a mandatory second column indicating ploidy (1 or 2). + Sample names that are not present are assumed to have ploidy 2 (diploids). + GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. + pattern: "*.{txt,tsv}" + - input_region: type: string description: Target region used for imputation, including left and right buffers (e.g. chr20:1000000-2000000). @@ -54,15 +66,6 @@ input: description: File containing the genetic map. pattern: "*.gmap" - - samples_file: - type: file - description: | - File with sample names and ploidy information. - One sample per line with a mandatory second column indicating ploidy (1 or 2). - Sample names that are not present are assumed to have ploidy 2 (diploids). - GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. - pattern: "*.{txt,tsv}" - output: - meta: type: map diff --git a/modules/nf-core/glimpse2/chunk/main.nf b/modules/nf-core/glimpse2/chunk/main.nf index bfb7e440bd4..a6b0011d51f 100644 --- a/modules/nf-core/glimpse2/chunk/main.nf +++ b/modules/nf-core/glimpse2/chunk/main.nf @@ -17,7 +17,8 @@ process GLIMPSE2_CHUNK { 'quay.io/biocontainers/glimpse-bio:2.0.0--hf340a29_0' }" input: - tuple val(meta), path(input), path(input_index), val(region), path(map) + tuple val(meta) , path(input), path(input_index), val(region) + tuple val(meta2), path(map) val(model) output: diff --git a/modules/nf-core/glimpse2/phase/main.nf b/modules/nf-core/glimpse2/phase/main.nf index 03e6803a3ed..97c332afaaf 100644 --- a/modules/nf-core/glimpse2/phase/main.nf +++ b/modules/nf-core/glimpse2/phase/main.nf @@ -18,8 +18,9 @@ process GLIMPSE2_PHASE { 'quay.io/biocontainers/glimpse-bio:2.0.0--hf340a29_0' }" input: - tuple val(meta), path(input), path(input_index), val(input_region), val(output_region), path(reference), path(reference_index), path(map), path(samples_file) + tuple val(meta) , path(input), path(input_index), path(samples_file), val(input_region), val(output_region), path(reference), path(reference_index), path(map) tuple val(meta2), path(fasta_reference), path(fasta_reference_index) + output: tuple val(meta), path("*.{vcf,bcf,bgen}"), emit: phased_variant tuple val(meta), path("*.txt.gz") , emit: stats_coverage, optional: true @@ -29,13 +30,17 @@ process GLIMPSE2_PHASE { task.ext.when == null || task.ext.when script: + def region = input_region ? "${output_region.replace(":","_")}" : "${reference}" def args = task.ext.args ?: "" - def prefix = task.ext.prefix ?: "${meta.id}_${input_region.replace(":","_")}" + def prefix = task.ext.prefix ?: "${meta.id}_${region}" def suffix = task.ext.suffix ?: "bcf" def map_command = map ? "--map $map" : "" def samples_file_command = samples_file ? "--samples-file $samples_file" : "" def fasta_command = fasta_reference ? "--fasta $fasta_reference" : "" + def input_region_cmd = input_region ? "--input-region $input_region" : "" + def output_region_cmd = output_region ? "--output-region $output_region": "" + def input_bam = input.any { it.extension in ["cram","bam"]} """ @@ -54,14 +59,14 @@ process GLIMPSE2_PHASE { $map_command \\ $fasta_command \\ $samples_file_command \\ - --input-region $input_region \\ - --output-region $output_region \\ + $input_region_cmd \\ + $output_region_cmd \\ --thread $task.cpus \\ --output ${prefix}.${suffix} cat <<-END_VERSIONS > versions.yml "${task.process}": - glimpse2: "\$(GLIMPSE2_split_reference --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" + glimpse2: "\$(GLIMPSE2_phase --help | sed -nr '/Version/p' | grep -o -E '([0-9]+.){1,2}[0-9]' | head -1)" END_VERSIONS """ } diff --git a/modules/nf-core/glimpse2/phase/meta.yml b/modules/nf-core/glimpse2/phase/meta.yml index 6ba4933e3bf..5e5e88cbbbc 100644 --- a/modules/nf-core/glimpse2/phase/meta.yml +++ b/modules/nf-core/glimpse2/phase/meta.yml @@ -22,9 +22,10 @@ input: e.g. [ id:'test', single_end:false ] - input: - type: files + type: file description: | - Either multiple BAM/CRAM files containing low-coverage sequencing reads or one VCF/BCF file containing the genotype likelihoods. When using BAM/CRAM the name of the file is used as samples name. + Either multiple BAM/CRAM files containing low-coverage sequencing reads or one VCF/BCF file containing the genotype likelihoods. + When using BAM/CRAM the name of the file is used as samples name. pattern: "*.{bam,cram,vcf,vcf.gz,bcf,bcf.gz}" - input_index: @@ -32,14 +33,27 @@ input: description: Index file of the input BAM/CRAM/VCF/BCF file. pattern: "*.{bam.bai,cram.crai,vcf.gz.csi,bcf.gz.csi}" + - samples_file: + type: file + description: | + File with sample names and ploidy information. + One sample per line with a mandatory second column indicating ploidy (1 or 2). + Sample names that are not present are assumed to have ploidy 2 (diploids). + GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. + pattern: "*.{txt,tsv}" + - input_region: type: string - description: Target region used for imputation, including left and right buffers (e.g. chr20:1000000-2000000). + description: | + Target region used for imputation, including left and right buffers (e.g. chr20:1000000-2000000). + Optional if reference panel is in bin format. pattern: "chrXX:leftBufferPosition-rightBufferPosition" - output_region: type: string - description: Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000). + description: | + Target imputed region, excluding left and right buffers (e.g. chr20:1000000-2000000). + Optional if reference panel is in bin format. pattern: "chrXX:leftBufferPosition-rightBufferPosition" - reference: @@ -53,27 +67,24 @@ input: pattern: "*.{vcf.gz.csi,bcf.gz.csi}" - map: - type: file - description: File containing the genetic map. - pattern: "*.gmap" - - - samples_file: type: file description: | - File with sample names and ploidy information. One sample per line with a mandatory second column indicating ploidy (1 or 2). Sample names that are not present are assumed to have ploidy 2 (diploids). GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. - pattern: "*.{txt,tsv}" + File containing the genetic map. + Optional if reference panel is in bin format. + pattern: "*.gmap" - fasta_reference: type: file description: | Faidx-indexed reference sequence file in the appropriate genome build. - Necessary for CRAM files + Necessary for CRAM files. pattern: "*.fasta" - fasta_reference_index: type: file description: | Faidx index of the reference sequence file in the appropriate genome build. + Necessary for CRAM files. pattern: "*.fai" output: diff --git a/modules/nf-core/glimpse2/splitreference/main.nf b/modules/nf-core/glimpse2/splitreference/main.nf index a86061d47d1..45c6a812cf7 100644 --- a/modules/nf-core/glimpse2/splitreference/main.nf +++ b/modules/nf-core/glimpse2/splitreference/main.nf @@ -31,7 +31,7 @@ process GLIMPSE2_SPLITREFERENCE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}_${output_region.replace(":","_")}" def map_command = map ? "--map $map" : "" """ diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/main.nf b/subworkflows/nf-core/multiple_impute_glimpse2/main.nf new file mode 100644 index 00000000000..b300f771246 --- /dev/null +++ b/subworkflows/nf-core/multiple_impute_glimpse2/main.nf @@ -0,0 +1,73 @@ +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' +include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' +include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' + +workflow MULTIPLE_IMPUTE_GLIMPSE2 { + + take: + ch_input // channel (mandatory): [ meta, vcf, csi, infos ] + ch_ref // channel (mandatory): [ meta, vcf, csi, region ] + ch_map // channel (optional): [ meta, map ] + ch_fasta // channel (optional): [ meta, fasta, index ] + chunk_model // string: model used to chunk the reference panel + + main: + + ch_versions = Channel.empty() + + // Chunk reference panel + GLIMPSE2_CHUNK ( ch_ref, ch_map, chunk_model ) + ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) + + chunk_output = GLIMPSE2_CHUNK.out.chunk_chr + .splitCsv(header: ['ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants'], + sep: "\t", skip: 0) + .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} + + // Split reference panel in bin files + split_input = ch_ref.map{ meta, ref, index, region -> [meta, ref, index]} + .combine(chunk_output, by: 0) + + GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) + ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) + + phase_input = ch_input.combine( GLIMPSE2_SPLITREFERENCE.out.bin_ref ) + .map{ input_meta, input_file, input_index, input_infos, + panel_meta, panel_bin -> + [input_meta, input_file, input_index, input_infos, + [], [], panel_bin, [], []] + }/* Remove unnecessary meta maps + add null index as we use a bin file, + add null value for input and output region as we use a bin file */ + + // Phase input files for each reference bin files + indexing + GLIMPSE2_PHASE ( phase_input, ch_fasta ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map], [ meta, fasta, index ] + ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() ) + + INDEX_PHASE ( GLIMPSE2_PHASE.out.phased_variant ) + ch_versions = ch_versions.mix( INDEX_PHASE.out.versions.first() ) + + // Ligate all phased files in one and index it + ligate_input = GLIMPSE2_PHASE.out.phased_variant + .groupTuple() + .combine( INDEX_PHASE.out.csi + .groupTuple() + .collect(), by: 0 ) + + GLIMPSE2_LIGATE ( ligate_input ) + ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() ) + + INDEX_LIGATE ( GLIMPSE2_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( INDEX_LIGATE.out.versions.first() ) + + emit: + chunk_chr = GLIMPSE2_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] + merged_variants = GLIMPSE2_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] + merged_variants_index = INDEX_LIGATE.out.csi // channel: [ val(meta), csi ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/multiple_impute_glimpse2/meta.yml b/subworkflows/nf-core/multiple_impute_glimpse2/meta.yml new file mode 100644 index 00000000000..4f233281b1e --- /dev/null +++ b/subworkflows/nf-core/multiple_impute_glimpse2/meta.yml @@ -0,0 +1,72 @@ +name: "multiple_imputation_glimpse2" +description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2 +keywords: + - glimpse + - chunk + - phase + - ligate + - split_reference + +modules: + - glimpse2/chunk + - glimpse/2phase + - glimpse2/ligate + - glimpse2/split_reference + - bcftools/index + +input: + - ch_input: + type: file + description: | + Target dataset in CRAM, BAM or VCF/BCF format. + Index file of the input file. + File with sample names and ploidy information. + Structure: [ meta, file, index, txt ] + + - ch_ref: + type: file + description: | + Reference panel of haplotypes in VCF/BCF format. + Index file of the Reference panel file. + Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). + The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). + Structure: [ meta, vcf, csi, region ] + + - ch_map: + type: file + description: | + File containing the genetic map. + Structure: [ meta, gmap ] + + - ch_fasta: + type: file + description: | + Reference genome in fasta format. + Reference genome index in fai format + Structure: [ meta, fasta, fai ] + +output: + - chunk_chr: + type: file + description: | + Tab delimited output txt file containing buffer and imputation regions. + Structure: [meta, txt] + + - merged_variants: + type: file + description: | + Output VCF/BCF file for the merged regions. + Phased information (HS field) is updated accordingly for the full region. + Structure: [ val(meta), bcf ] + + - merged_variants_index: + type: file + description: Index file of the ligated phased variants files. + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@LouisLeNezet" diff --git a/subworkflows/nf-core/vcf_impute_glimpse/main.nf b/subworkflows/nf-core/vcf_impute_glimpse/main.nf index ce3ce7eedc4..d5d9b032220 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/main.nf +++ b/subworkflows/nf-core/vcf_impute_glimpse/main.nf @@ -1,46 +1,65 @@ -include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk/main' -include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase/main' -include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate/main' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' +include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk/main' +include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase/main' +include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate/main' +include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' workflow VCF_IMPUTE_GLIMPSE { take: - ch_vcf // channel (mandatory): [ meta, vcf, csi, region, sample ] + ch_vcf // channel (mandatory): [ meta, vcf, csi, sample, region ] ch_ref // channel (mandatory): [ meta, vcf, csi ] - ch_map // channel (optional): path to map - ch_infos // channel (optional): sample infos + ch_map // channel (optional): [meta, map ] main: ch_versions = Channel.empty() - GLIMPSE_CHUNK ( ch_vcf ) + input_chunk = ch_vcf.map{ + meta, vcf, csi, sample, region -> + [ meta, vcf, csi, region] + } + + GLIMPSE_CHUNK ( input_chunk ) ch_versions = ch_versions.mix(GLIMPSE_CHUNK.out.versions) chunk_output = GLIMPSE_CHUNK.out.chunk_chr .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) - .map { metamap, it -> [metamap, it["RegionIn"], it["RegionOut"]]} - phase_input = ch_vcf.map{[it[0], it[1], it[2]]} + .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + + phase_input = ch_vcf.map{meta, vcf, csi, sample, region -> [meta, vcf, csi, sample]} .join(chunk_output) - .join(ch_ref) - .join(ch_map) - .join(ch_infos) + .combine(ch_ref) + .combine(ch_map) + .map{meta, vcf, csi, sample, + regionin, regionout, + meta_ref, ref, ref_index, + meta_map, map -> + [meta, vcf, csi, sample, regionin, regionout, ref, ref_index, map]} - GLIMPSE_PHASE ( phase_input ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + GLIMPSE_PHASE ( phase_input ) // [meta, vcf, index, sample_infos, regionin, regionout, ref, ref_index, map] ch_versions = ch_versions.mix(GLIMPSE_PHASE.out.versions.first()) - ligate_input = GLIMPSE_PHASE.out.phased_variant.groupTuple() + INDEX_PHASE ( GLIMPSE_PHASE.out.phased_variant ) + ch_versions = ch_versions.mix( INDEX_PHASE.out.versions.first() ) - BCFTOOLS_INDEX ( ligate_input ) - GLIMPSE_LIGATE ( ligate_input.join(BCFTOOLS_INDEX.out.csi.groupTuple()) ) + // Ligate all phased files in one and index it + ligate_input = GLIMPSE_PHASE.out.phased_variant + .groupTuple() + .combine( INDEX_PHASE.out.csi + .groupTuple() + .collect(), by: 0 ) + GLIMPSE_LIGATE ( ligate_input ) ch_versions = ch_versions.mix(GLIMPSE_LIGATE.out.versions.first()) + INDEX_LIGATE ( GLIMPSE_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( INDEX_LIGATE.out.versions.first() ) + emit: - chunk_chr = GLIMPSE_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] - merged_variants = GLIMPSE_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] - phased_variants = GLIMPSE_PHASE.out.phased_variant // channel: [ val(meta), bcf ] + chunk_chr = GLIMPSE_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] + merged_variants = GLIMPSE_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] + merged_variants_index = INDEX_LIGATE.out.csi // channel: [ val(meta), csi ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/vcf_impute_glimpse/meta.yml b/subworkflows/nf-core/vcf_impute_glimpse/meta.yml index 9e0ea49ecfd..3dd30d9313f 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/meta.yml +++ b/subworkflows/nf-core/vcf_impute_glimpse/meta.yml @@ -11,6 +11,7 @@ modules: - glimpse/chunk - glimpse/phase - glimpse/ligate + - bcftools/index input: - ch_vcf: @@ -18,9 +19,10 @@ input: description: | Target dataset in VCF/BCF format defined at all variable positions. Index file of the input VCF/BCF file containing genotype likelihoods. + File with sample names and ploidy information. Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). - Structure: [ meta, vcf, csi, region ] + Structure: [ meta, vcf, csi, txt, region ] - ch_ref: type: file @@ -35,15 +37,6 @@ input: File containing the genetic map. Structure: [gmap] - - ch_infos: - type: file - description: | - File with sample names and ploidy information. - One sample per line with a mandatory second column indicating ploidy (1 or 2). - Sample names that are not present are assumed to have ploidy 2 (diploids). - GLIMPSE does NOT handle the use of sex (M/F) instead of ploidy. - Structure: [ txt ] - output: - chunk_chr: type: file diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 5c6af0b59d3..b3020dddda5 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3464,6 +3464,10 @@ subworkflows/homer/groseq: - subworkflows/nf-core/homer/groseq/** - tests/subworkflows/nf-core/homer/groseq/** +subworkflows/multiple_impute_glimpse2: + - subworkflows/nf-core/multiple_impute_glimpse2/** + - tests/subworkflows/nf-core/multiple_impute_glimpse2/** + subworkflows/vcf_annotate_ensemblvep: - subworkflows/nf-core/vcf_annotate_ensemblvep/** - tests/subworkflows/nf-core/vcf_annotate_ensemblvep/** diff --git a/tests/modules/nf-core/glimpse/chunk/main.nf b/tests/modules/nf-core/glimpse/chunk/main.nf index cb1610e18b0..a481fca8ae7 100644 --- a/tests/modules/nf-core/glimpse/chunk/main.nf +++ b/tests/modules/nf-core/glimpse/chunk/main.nf @@ -6,7 +6,7 @@ include { GLIMPSE_CHUNK } from '../../../../../modules/nf-core/glimpse/chunk/mai workflow test_glimpse_chunk { input = [ - [ id:'test', single_end:false ], // meta map + [ id:'input' ], // meta map file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true), "chr21"] diff --git a/tests/modules/nf-core/glimpse/chunk/test.yml b/tests/modules/nf-core/glimpse/chunk/test.yml index dfe3f7bc294..bce2b34bc48 100644 --- a/tests/modules/nf-core/glimpse/chunk/test.yml +++ b/tests/modules/nf-core/glimpse/chunk/test.yml @@ -4,6 +4,6 @@ - glimpse - glimpse/chunk files: - - path: output/glimpse/test.txt + - path: output/glimpse/input.txt md5sum: 9e5562b3f94857b8189b59849ce65cfb - path: output/glimpse/versions.yml diff --git a/tests/modules/nf-core/glimpse/concordance/main.nf b/tests/modules/nf-core/glimpse/concordance/main.nf index ad361cf41be..fe25e6874c6 100644 --- a/tests/modules/nf-core/glimpse/concordance/main.nf +++ b/tests/modules/nf-core/glimpse/concordance/main.nf @@ -9,13 +9,15 @@ include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/in workflow test_glimpse_concordance { - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input' ], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000" + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) + + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), @@ -26,31 +28,34 @@ workflow test_glimpse_concordance { file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') - GLIMPSE_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(samples_file) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + input_vcf_with_samples_infos.combine(ref_panel) + .combine(ch_map) + ) // [meta, vcf, sample_infos, index, regionin, regionout, regionindex, ref, ref_index, map] ligate_input = GLIMPSE_PHASE.output.phased_variant .groupTuple() BCFTOOLS_INDEX ( ligate_input ) + GLIMPSE_LIGATE ( ligate_input.join(BCFTOOLS_INDEX.out.csi.groupTuple()) ) - allele_freq = [file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true)] + allele_freq = [ + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true) + ] - truth = [file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true)] + truth = [ + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true) + ] list_inputs = Channel.of(["chr21", allele_freq[0], truth[0]]) .combine(GLIMPSE_LIGATE.out.merged_variants.map{it[1]}.collect().map{it[0]}) .collect() + concordance_input = Channel.of([[ id:'input', single_end:false ]]).combine(list_inputs) GLIMPSE_CONCORDANCE ( concordance_input, [], [], []) // meta, Region, Frequencies, Truth, Estimate, minPROB, minDP, bins + } diff --git a/tests/modules/nf-core/glimpse/ligate/main.nf b/tests/modules/nf-core/glimpse/ligate/main.nf index 2862f53212a..7ffb7dbbd53 100644 --- a/tests/modules/nf-core/glimpse/ligate/main.nf +++ b/tests/modules/nf-core/glimpse/ligate/main.nf @@ -2,35 +2,40 @@ nextflow.enable.dsl = 2 -include { GLIMPSE_LIGATE } from '../../../../../modules/nf-core/glimpse/ligate/main.nf' -include { GLIMPSE_PHASE } from '../../../../../modules/nf-core/glimpse/phase/main.nf' +include { GLIMPSE_LIGATE } from '../../../../../modules/nf-core/glimpse/ligate/main.nf' +include { GLIMPSE_PHASE } from '../../../../../modules/nf-core/glimpse/phase/main.nf' include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/index/main.nf' workflow test_glimpse_ligate { - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000", + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) + + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) ]) + ch_map = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') + GLIMPSE_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(samples_file) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + input_vcf_with_samples_infos.combine(ref_panel) + .combine(ch_map) + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] ligate_input = GLIMPSE_PHASE.output.phased_variant .groupTuple() + BCFTOOLS_INDEX ( ligate_input ) + GLIMPSE_LIGATE ( ligate_input.join(BCFTOOLS_INDEX.out.csi.groupTuple()) ) } diff --git a/tests/modules/nf-core/glimpse/phase/main.nf b/tests/modules/nf-core/glimpse/phase/main.nf index 152cb332273..bfe5ea37b5f 100644 --- a/tests/modules/nf-core/glimpse/phase/main.nf +++ b/tests/modules/nf-core/glimpse/phase/main.nf @@ -4,14 +4,18 @@ nextflow.enable.dsl = 2 include { GLIMPSE_PHASE } from '../../../../../modules/nf-core/glimpse/phase/main.nf' - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + empty_channel = Channel.of([[]]) + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000", + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + input_vcf_without_samples_infos = input_vcf.combine(empty_channel).combine(region) + ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) @@ -21,20 +25,16 @@ include { GLIMPSE_PHASE } from '../../../../../modules/nf-core/glimpse/phase/mai file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') - workflow test_glimpse_phase_nosampleinfos { GLIMPSE_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(Channel.of([[]])) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + input_vcf_without_samples_infos.combine(ref_panel) + .combine(ch_map) + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] } workflow test_glimpse_phase_withsampleinfos { GLIMPSE_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(samples_file) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + input_vcf_with_samples_infos.combine(ref_panel) + .combine(ch_map) + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] } diff --git a/tests/modules/nf-core/glimpse/sample/main.nf b/tests/modules/nf-core/glimpse/sample/main.nf index 029ace0c191..093fd16036d 100644 --- a/tests/modules/nf-core/glimpse/sample/main.nf +++ b/tests/modules/nf-core/glimpse/sample/main.nf @@ -8,15 +8,17 @@ include { GLIMPSE_SAMPLE } from '../../../../../modules/nf-core/glimpse/sample/m include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/index/main.nf' workflow test_glimpse_sample { - - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000", + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) @@ -26,13 +28,10 @@ workflow test_glimpse_sample { file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') - - GLIMPSE_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(Channel.of([[]])) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + GLIMPSE_PHASE ( + input_vcf_with_samples_infos.combine(ref_panel) + .combine(ch_map) + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] ligate_input = GLIMPSE_PHASE.output.phased_variant .groupTuple() diff --git a/tests/modules/nf-core/glimpse2/chunk/main.nf b/tests/modules/nf-core/glimpse2/chunk/main.nf index 607d6bca23c..177beb1f6d4 100644 --- a/tests/modules/nf-core/glimpse2/chunk/main.nf +++ b/tests/modules/nf-core/glimpse2/chunk/main.nf @@ -4,27 +4,28 @@ nextflow.enable.dsl = 2 include { GLIMPSE2_CHUNK } from '../../../../../modules/nf-core/glimpse2/chunk/main.nf' -workflow test_glimpse2_chunk { - - input = [ +input = [ [ id:'test', single_end:false ], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true), - "chr21", - []] - GLIMPSE2_CHUNK (input, "recursive") + "chr21"] + +workflow test_glimpse2_chunk { + ch_map_empty = Channel.of([ + [ id:'map'], + [] + ]).collect() + + GLIMPSE2_CHUNK (input, ch_map_empty, "recursive") } workflow test_glimpse2_chunk_withmap { - - input = [ - [ id:'test', single_end:false ], // meta map - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true), - "chr21", - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true)] - GLIMPSE2_CHUNK (input, "recursive") + ch_map = Channel.of([ + [ id:'map'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]).collect() + GLIMPSE2_CHUNK (input,ch_map,"recursive") } diff --git a/tests/modules/nf-core/glimpse2/concordance/main.nf b/tests/modules/nf-core/glimpse2/concordance/main.nf index 4d2508d3a74..ad78ce739d3 100644 --- a/tests/modules/nf-core/glimpse2/concordance/main.nf +++ b/tests/modules/nf-core/glimpse2/concordance/main.nf @@ -3,20 +3,23 @@ nextflow.enable.dsl = 2 include { GLIMPSE2_PHASE } from '../../../../../modules/nf-core/glimpse2/phase/main.nf' -include { GLIMPSE_LIGATE } from '../../../../../modules/nf-core/glimpse/ligate/main.nf' +include { GLIMPSE_LIGATE } from '../../../../../modules/nf-core/glimpse/ligate/main.nf' include { GLIMPSE2_CONCORDANCE } from '../../../../../modules/nf-core/glimpse2/concordance/main.nf' -include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/index/main.nf' workflow test_glimpse2_concordance { - - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000" + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) - + + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + + ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) @@ -26,32 +29,34 @@ workflow test_glimpse2_concordance { file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') - GLIMPSE2_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(samples_file), + input_vcf_with_samples_infos.combine( ref_panel ) + .combine( ch_map ), Channel.of([[],[],[]]) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] - - allele_freq = Channel.of([file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true)]) - - truth = Channel.of([file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true)]) - - BCFTOOLS_INDEX(GLIMPSE2_PHASE.output.phased_variant) + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] + + allele_freq = Channel.of([ + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz",checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.sites.vcf.gz.csi",checkIfExists:true) + ]) + + truth = Channel.of([ + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf",checkIfExists:true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.bcf.csi",checkIfExists:true) + ]) + + BCFTOOLS_INDEX ( GLIMPSE2_PHASE.output.phased_variant ) + list_inputs = GLIMPSE2_PHASE.output.phased_variant - .join(BCFTOOLS_INDEX.out.csi) - .combine(truth) - .combine(allele_freq) - .combine(Channel.of([[]])) - .combine(Channel.of(["chr21"])) + .join( BCFTOOLS_INDEX.out.csi ) + .combine( truth ) + .combine( allele_freq ) + .combine( Channel.of([[]]) ) + .combine( Channel.of(["chr21"]) ) GLIMPSE2_CONCORDANCE ( list_inputs, Channel.of([[id:"params"],[],"0 0.01 0.05 0.1 0.2 0.5",[],[]]), 0.9999, 8) // [meta, Region, Frequencies, Truth, Estimate], [meta, group, bins, ac_bins, allele_count], min-val-gl, min-val-dp + } diff --git a/tests/modules/nf-core/glimpse2/concordance/test.yml b/tests/modules/nf-core/glimpse2/concordance/test.yml index 9999229c3e0..f62cb36fcbc 100644 --- a/tests/modules/nf-core/glimpse2/concordance/test.yml +++ b/tests/modules/nf-core/glimpse2/concordance/test.yml @@ -4,12 +4,12 @@ - glimpse2 - glimpse2/concordance files: - - path: output/bcftools/input_chr21_16600000-16800000.bcf.csi + - path: output/bcftools/input_chr21_16650000-16750000.bcf.csi - path: output/bcftools/versions.yml - path: output/glimpse2/input.error.cal.txt.gz - path: output/glimpse2/input.error.grp.txt.gz - path: output/glimpse2/input.error.spl.txt.gz - path: output/glimpse2/input.rsquare.grp.txt.gz - path: output/glimpse2/input.rsquare.spl.txt.gz - - path: output/glimpse2/input_chr21_16600000-16800000.bcf + - path: output/glimpse2/input_chr21_16650000-16750000.bcf - path: output/glimpse2/versions.yml diff --git a/tests/modules/nf-core/glimpse2/ligate/main.nf b/tests/modules/nf-core/glimpse2/ligate/main.nf index 20f1c2bfde3..82e885243ac 100644 --- a/tests/modules/nf-core/glimpse2/ligate/main.nf +++ b/tests/modules/nf-core/glimpse2/ligate/main.nf @@ -7,31 +7,37 @@ include { GLIMPSE2_PHASE } from '../../../../../modules/nf-core/glimpse2/phase/ include { BCFTOOLS_INDEX } from '../../../../../modules/nf-core/bcftools/index/main.nf' workflow test_glimpse2_ligate { - input_vcf = Channel.of([ - [ id:'input', single_end:false ], // meta map + + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + region = Channel.of(["chr21:16600000-16800000","chr21:16650000-16750000"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), - "chr21:16600000-16800000", - "chr21:16650000-16750000", + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) + + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) ]) + ch_map = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true), ]) - samples_file = Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') + GLIMPSE2_PHASE ( - input_vcf.combine(ref_panel) - .combine(ch_map) - .combine(samples_file), + input_vcf_with_samples_infos.combine( ref_panel ) + .combine( ch_map ), Channel.of([[],[],[]]) - ) // [meta, vcf, index, regionin, regionout, regionindex, ref, ref_index, map, sample_infos] + ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map] ligate_input = GLIMPSE2_PHASE.output.phased_variant .groupTuple() + BCFTOOLS_INDEX ( ligate_input ) + GLIMPSE2_LIGATE ( ligate_input.join(BCFTOOLS_INDEX.out.csi.groupTuple()) ) + } diff --git a/tests/modules/nf-core/glimpse2/ligate/test.yml b/tests/modules/nf-core/glimpse2/ligate/test.yml index d8518751b98..c18143cd298 100644 --- a/tests/modules/nf-core/glimpse2/ligate/test.yml +++ b/tests/modules/nf-core/glimpse2/ligate/test.yml @@ -4,8 +4,8 @@ - glimpse2 - glimpse2/ligate files: - - path: output/bcftools/input_chr21_16600000-16800000.bcf.csi + - path: output/bcftools/input_chr21_16650000-16750000.bcf.csi - path: output/bcftools/versions.yml - path: output/glimpse2/input.vcf.gz - - path: output/glimpse2/input_chr21_16600000-16800000.bcf + - path: output/glimpse2/input_chr21_16650000-16750000.bcf - path: output/glimpse2/versions.yml diff --git a/tests/modules/nf-core/glimpse2/phase/main.nf b/tests/modules/nf-core/glimpse2/phase/main.nf index bff99e53ba6..36219c4e92e 100644 --- a/tests/modules/nf-core/glimpse2/phase/main.nf +++ b/tests/modules/nf-core/glimpse2/phase/main.nf @@ -4,11 +4,12 @@ nextflow.enable.dsl = 2 include { GLIMPSE2_PHASE } from '../../../../../modules/nf-core/glimpse2/phase/main.nf' - + input_vcf = Channel.of([ [ id:'input' ], // meta map file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true), + [], "chr21:16600000-16800000", "chr21:16650000-16750000" ]) @@ -16,15 +17,17 @@ input_bam = Channel.of([ [id:'input'], file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.bam", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.bam.bai", checkIfExists: true), + [], "chr21:16600000-16800000", - "chr21:16650000-16750000", + "chr21:16650000-16750000", ]) input_cram = Channel.of([ [id:'input'], file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.cram", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.cram.crai", checkIfExists: true), + [], "chr21:16600000-16800000", - "chr21:16650000-16750000", + "chr21:16650000-16750000", ]) ref_panel = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), @@ -35,10 +38,6 @@ map_file = Channel.of([ file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) ]) -samples_file = Channel.of([ - [] - ]) - reference_genome = Channel.of([ [id:'refHG38_chr21'], file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), @@ -48,26 +47,23 @@ reference_genome = Channel.of([ workflow test_glimpse2_phase_vcf { GLIMPSE2_PHASE ( input_vcf.combine(ref_panel) - .combine(map_file) - .combine(samples_file), + .combine(map_file), Channel.of([[],[],[]]) - ) // [meta, vcf, index, regionin, regionout, regionindex, sample_infos], map, sample, [meta, ref, index] + ) // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] } workflow test_glimpse2_phase_bam { GLIMPSE2_PHASE ( input_bam.combine(ref_panel) - .combine(map_file) - .combine(samples_file), + .combine(map_file), Channel.of([[],[],[]]) - ) // [meta, vcf, index, regionin, regionout, regionindex, sample_infos], map, sample, [meta, ref, index] + ) // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] } workflow test_glimpse2_phase_cram { GLIMPSE2_PHASE ( input_cram.combine(ref_panel) - .combine(map_file) - .combine(samples_file), + .combine(map_file), reference_genome - ) // [meta, vcf, index, regionin, regionout, regionindex, sample_infos], map, sample, [meta, ref, index] -} \ No newline at end of file + ) // [meta, vcf, index, sample_infos, regionin, regionout,ref, index, map] [meta, fasta, fai] +} diff --git a/tests/modules/nf-core/glimpse2/phase/test.yml b/tests/modules/nf-core/glimpse2/phase/test.yml index 17d8e3e8ad0..d23a02903db 100644 --- a/tests/modules/nf-core/glimpse2/phase/test.yml +++ b/tests/modules/nf-core/glimpse2/phase/test.yml @@ -4,7 +4,7 @@ - glimpse2 - glimpse2/phase files: - - path: output/glimpse2/input_chr21_16600000-16800000.bcf + - path: output/glimpse2/input_chr21_16650000-16750000.bcf - path: output/glimpse2/versions.yml - name: glimpse2 phase test_glimpse2_phase_bam @@ -13,8 +13,8 @@ - glimpse2 - glimpse2/phase files: - - path: output/glimpse2/input_chr21_16600000-16800000.bcf - - path: output/glimpse2/input_chr21_16600000-16800000_stats_coverage.txt.gz + - path: output/glimpse2/input_chr21_16650000-16750000.bcf + - path: output/glimpse2/input_chr21_16650000-16750000_stats_coverage.txt.gz md5sum: 632f4bf08bed0870192933a0a32b95c8 - path: output/glimpse2/versions.yml @@ -24,7 +24,7 @@ - glimpse2 - glimpse2/phase files: - - path: output/glimpse2/input_chr21_16600000-16800000.bcf - - path: output/glimpse2/input_chr21_16600000-16800000_stats_coverage.txt.gz + - path: output/glimpse2/input_chr21_16650000-16750000.bcf + - path: output/glimpse2/input_chr21_16650000-16750000_stats_coverage.txt.gz md5sum: a2d58d6fcd1918f649a4eb19d0ee68c3 - path: output/glimpse2/versions.yml diff --git a/tests/subworkflows/nf-core/multiple_impute_glimpse2/main.nf b/tests/subworkflows/nf-core/multiple_impute_glimpse2/main.nf new file mode 100644 index 00000000000..df0c9fb0a00 --- /dev/null +++ b/tests/subworkflows/nf-core/multiple_impute_glimpse2/main.nf @@ -0,0 +1,73 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { MULTIPLE_IMPUTE_GLIMPSE2 } from '../../../../subworkflows/nf-core/multiple_impute_glimpse2/main.nf' + + ch_input_vcf = Channel.of([ + [ id:'input_vcf'], // meta map + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", + checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", + checkIfExists: true), + ]) + sample = Channel.of('NA12878 2') + .collectFile(name: 'sampleinfos.txt') + ch_input_vcf_with_sample = ch_input_vcf.combine(sample) + ch_input_vcf_without_sample = ch_input_vcf.combine(Channel.of([[]])) + + ch_ref_panel = Channel.of([ + [ id:'ref_panel'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", + checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", + checkIfExists: true), + "chr21" + ]) + ch_map = Channel.of([ + [ id:'map'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) + ]).collect() + + ch_fasta_empty = Channel.of([ + [ id:'ref_fasta'], + [], + [] + ]).collect() + +workflow test_multiple_impute_glimpse2_without_sample { + MULTIPLE_IMPUTE_GLIMPSE2 ( ch_input_vcf_without_sample, + ch_ref_panel, ch_map, ch_fasta_empty, "recursive" ) +} + +workflow test_multiple_impute_glimpse2_with_sample { + MULTIPLE_IMPUTE_GLIMPSE2 ( ch_input_vcf_with_sample, + ch_ref_panel, ch_map, ch_fasta_empty, "recursive" ) +} + +workflow test_multiple_impute_glimpse2_bam { + + input_bam = Channel.of([ + [id:'input'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.bam", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.bam.bai", checkIfExists: true), + [] + ]) + + MULTIPLE_IMPUTE_GLIMPSE2 ( input_bam, ch_ref_panel, ch_map, ch_fasta_empty, "recursive" ) +} + +workflow test_multiple_impute_glimpse2_cram { + input_cram = Channel.of([ + [id:'input'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.cram", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.cram.crai", checkIfExists: true), + [] + ]) + ch_fasta = Channel.of([ + [id:'refHG38_chr21'], + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/hs38DH.chr21.fa.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/hs38DH.chr21.fa.gz.fai", checkIfExists: true) + ]).collect() + MULTIPLE_IMPUTE_GLIMPSE2 ( input_cram, ch_ref_panel, ch_map, ch_fasta, "recursive" ) +} diff --git a/tests/subworkflows/nf-core/multiple_impute_glimpse2/nextflow.config b/tests/subworkflows/nf-core/multiple_impute_glimpse2/nextflow.config new file mode 100644 index 00000000000..15fff1d62f5 --- /dev/null +++ b/tests/subworkflows/nf-core/multiple_impute_glimpse2/nextflow.config @@ -0,0 +1,13 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GLIMPSE2_CHUNK { + ext.args = [ + "--window-cm 0.05", + "--buffer-cm 0.02", + "--window-mb 0.05", + "--buffer-mb 0.02", + "--window-count 50", + "--buffer-count 25" + ].join(' ') + } +} diff --git a/tests/subworkflows/nf-core/multiple_impute_glimpse2/test.yml b/tests/subworkflows/nf-core/multiple_impute_glimpse2/test.yml new file mode 100644 index 00000000000..ead3a4648a9 --- /dev/null +++ b/tests/subworkflows/nf-core/multiple_impute_glimpse2/test.yml @@ -0,0 +1,103 @@ +- name: multiple_impute_glimpse2 test_multiple_impute_glimpse2_without_sample + command: nextflow run ./tests/subworkflows/nf-core/multiple_impute_glimpse2 -entry test_multiple_impute_glimpse2_without_sample -c ./tests/config/nextflow.config + tags: + - bcftools + - bcftools/index + - glimpse2 + - glimpse2/chunk + - glimpse2/ligate + - glimpse2/phase + - glimpse2/splitreference + - subworkflows + - subworkflows/multiple_impute_glimpse2 + files: + - path: output/glimpse2/input_vcf.vcf.gz + - path: output/glimpse2/input_vcf_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_vcf_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/ref_panel.txt + md5sum: 129913c63dbe586de143b8062bd1119d + - path: output/glimpse2/ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin + - path: output/glimpse2/ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin + - path: output/index/input_vcf.vcf.gz.csi + - path: output/index/input_vcf_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf.csi + - path: output/index/input_vcf_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf.csi + +- name: multiple_impute_glimpse2 test_multiple_impute_glimpse2_with_sample + command: nextflow run ./tests/subworkflows/nf-core/multiple_impute_glimpse2 -entry test_multiple_impute_glimpse2_with_sample -c ./tests/config/nextflow.config + tags: + - bcftools + - bcftools/index + - glimpse2 + - glimpse2/chunk + - glimpse2/ligate + - glimpse2/phase + - glimpse2/splitreference + - subworkflows + - subworkflows/multiple_impute_glimpse2 + files: + - path: output/glimpse2/input_vcf.vcf.gz + - path: output/glimpse2/input_vcf_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_vcf_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/ref_panel.txt + md5sum: 129913c63dbe586de143b8062bd1119d + - path: output/glimpse2/ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin + - path: output/glimpse2/ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin + - path: output/index/input_vcf.vcf.gz.csi + - path: output/index/input_vcf_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf.csi + - path: output/index/input_vcf_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf.csi + +- name: multiple_impute_glimpse2 test_multiple_impute_glimpse2_bam + command: nextflow run ./tests/subworkflows/nf-core/multiple_impute_glimpse2 -entry test_multiple_impute_glimpse2_bam -c ./tests/config/nextflow.config + tags: + - bcftools + - bcftools/index + - glimpse2 + - glimpse2/chunk + - glimpse2/ligate + - glimpse2/phase + - glimpse2/splitreference + - subworkflows + - subworkflows/multiple_impute_glimpse2 + files: + - path: output/glimpse2/input.vcf.gz + - path: output/glimpse2/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin_stats_coverage.txt.gz + md5sum: 632f4bf08bed0870192933a0a32b95c8 + - path: output/glimpse2/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin_stats_coverage.txt.gz + md5sum: 632f4bf08bed0870192933a0a32b95c8 + - path: output/glimpse2/ref_panel.txt + md5sum: 129913c63dbe586de143b8062bd1119d + - path: output/glimpse2/ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin + - path: output/glimpse2/ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin + - path: output/index/input.vcf.gz.csi + - path: output/index/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf.csi + - path: output/index/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf.csi + +- name: multiple_impute_glimpse2 test_multiple_impute_glimpse2_cram + command: nextflow run ./tests/subworkflows/nf-core/multiple_impute_glimpse2 -entry test_multiple_impute_glimpse2_cram -c ./tests/config/nextflow.config + tags: + - bcftools + - bcftools/index + - glimpse2 + - glimpse2/chunk + - glimpse2/ligate + - glimpse2/phase + - glimpse2/splitreference + - subworkflows + - subworkflows/multiple_impute_glimpse2 + files: + - path: output/glimpse2/input.vcf.gz + - path: output/glimpse2/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin_stats_coverage.txt.gz + md5sum: a2d58d6fcd1918f649a4eb19d0ee68c3 + - path: output/glimpse2/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf + - path: output/glimpse2/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin_stats_coverage.txt.gz + md5sum: a2d58d6fcd1918f649a4eb19d0ee68c3 + - path: output/glimpse2/ref_panel.txt + md5sum: 129913c63dbe586de143b8062bd1119d + - path: output/glimpse2/ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin + - path: output/glimpse2/ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin + - path: output/index/input.vcf.gz.csi + - path: output/index/input_ref_panel_chr21_16600115-16696345_chr21_16600115_16799989.bin.bcf.csi + - path: output/index/input_ref_panel_chr21_16696369-16799989_chr21_16600115_16799989.bin.bcf.csi diff --git a/tests/subworkflows/nf-core/vcf_impute_glimpse/main.nf b/tests/subworkflows/nf-core/vcf_impute_glimpse/main.nf index befb4e4ad45..f6cf80c20d7 100644 --- a/tests/subworkflows/nf-core/vcf_impute_glimpse/main.nf +++ b/tests/subworkflows/nf-core/vcf_impute_glimpse/main.nf @@ -4,35 +4,35 @@ nextflow.enable.dsl = 2 include { VCF_IMPUTE_GLIMPSE } from '../../../../subworkflows/nf-core/vcf_impute_glimpse/main.nf' - input_vcf = Channel.of([ - [ id:'input1'], // meta map - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", - checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", - checkIfExists: true), - "chr21" + samples_infos = Channel.of('NA12878 2').collectFile(name: 'sampleinfos.txt') + empty_channel = Channel.of([[]]) + region = Channel.of(["chr21"]) + input_vcf = Channel.of([ + [ id:'input'], // meta map + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/NA12878.chr21.s.1x.vcf.gz.csi", checkIfExists: true) ]) - ref_panel = Channel.of([ - [ id:'input1'], + + input_vcf_with_samples_infos = input_vcf.combine(samples_infos).combine(region) + input_vcf_without_samples_infos = input_vcf.combine(empty_channel).combine(region) + + ch_ref_panel = Channel.of([ + [ id:'ref_panel'], file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/1000GP.chr21.noNA12878.s.bcf.csi", checkIfExists: true) ]) + ch_map = Channel.of([ - [ id:'input1'], + [ id:'map'], file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/glimpse/chr21.b38.gmap.gz", checkIfExists: true) ]) workflow test_vcf_impute_glimpse_without_sample { - sample = Channel.of([[ id:'input1'],[]]) - VCF_IMPUTE_GLIMPSE ( input_vcf, ref_panel, ch_map, sample ) + VCF_IMPUTE_GLIMPSE ( input_vcf_without_samples_infos, ch_ref_panel, ch_map ) } workflow test_vcf_impute_glimpse_with_sample { - sample = Channel.of([[ id:'input1']]) - .combine(Channel.of('NA12878 2') - .collectFile(name: 'sampleinfos.txt') - ) - VCF_IMPUTE_GLIMPSE ( input_vcf, ref_panel, ch_map, sample ) + VCF_IMPUTE_GLIMPSE ( input_vcf_with_samples_infos, ch_ref_panel, ch_map ) } diff --git a/tests/subworkflows/nf-core/vcf_impute_glimpse/test.yml b/tests/subworkflows/nf-core/vcf_impute_glimpse/test.yml index f950fe3df1f..0dd90f6b903 100644 --- a/tests/subworkflows/nf-core/vcf_impute_glimpse/test.yml +++ b/tests/subworkflows/nf-core/vcf_impute_glimpse/test.yml @@ -8,10 +8,10 @@ - subworkflows - subworkflows/vcf_impute_glimpse files: - - path: output/glimpse/input1.txt + - path: output/glimpse/input.txt md5sum: 75bff56f26d8a590c429afee74df5110 - - path: output/glimpse/input1.vcf.gz - - path: output/glimpse/input1_chr21_16600115-16799989.vcf.gz + - path: output/glimpse/input.vcf.gz + - path: output/glimpse/input_chr21_16600115-16799989.vcf.gz - name: vcf_impute_glimpse test_vcf_impute_glimpse_with_sample command: nextflow run ./tests/subworkflows/nf-core/vcf_impute_glimpse -entry test_vcf_impute_glimpse_with_sample -c ./tests/config/nextflow.config @@ -23,7 +23,7 @@ - subworkflows - subworkflows/vcf_impute_glimpse files: - - path: output/glimpse/input1.txt + - path: output/glimpse/input.txt md5sum: 75bff56f26d8a590c429afee74df5110 - - path: output/glimpse/input1.vcf.gz - - path: output/glimpse/input1_chr21_16600115-16799989.vcf.gz + - path: output/glimpse/input.vcf.gz + - path: output/glimpse/input_chr21_16600115-16799989.vcf.gz