diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 7dc12d70..517afdfd 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -15,9 +15,6 @@ jobs: steps: - name: Launch workflow via tower uses: nf-core/tower-action@v3 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.gitignore b/.gitignore index 7fbf2d9d..8dd14f90 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ testing/ testing* *.pyc *.fasta -*.fai \ No newline at end of file +*.fai diff --git a/.nf-core.yml b/.nf-core.yml index a76e0ec8..625ce93d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,3 +1,8 @@ repository_type: pipeline lint: + files_exist: + - conf/igenomes.config + files_unchanged: + - .github/workflows/branch.yml + - lib/NfcoreSchema.groovy template_strings: False diff --git a/CHANGELOG.md b/CHANGELOG.md index 5808de8d..34b674be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,13 @@ Initial release of nf-core/bamtofastq, created with the [nf-core](https://nf-co. ### `Added` +- [#45](https://github.com/nf-core/bamtofastq/pull/45) Add `test.yml` files with md5sums +- [#44](https://github.com/nf-core/bamtofastq/pull/44) DSL2 conversion + ### `Fixed` +- [#45](https://github.com/nf-core/bamtofastq/pull/45) Minor bugfix with chromosome extraction + ### `Dependencies` ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 4add6172..c5b77688 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,11 +12,15 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) +- [SAMtools](https://doi.org/10.1093/gigascience/giab008) + + > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. D., Li, H., (2021) Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, giab008. doi: 10.1093/gigascience/giab008 > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. @@ -36,5 +40,7 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 889e1419..6fb48ddc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ![nf-core/bamtofastq](docs/images/nf-core-bamtofastq_logo_light.png#gh-light-mode-only) ![nf-core/bamtofastq](docs/images/nf-core-bamtofastq_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/bamtofastq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/bamtofastq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.4022138-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.4022138) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -16,8 +16,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - - On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/bamtofastq/results). ## Pipeline summary @@ -31,6 +29,10 @@ By default, the pipeline currently performs the following steps: 5. QC of converted fastq reads ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)). 6. Summarize QC and statistics before and after format conversion ([`MultiQC`](http://multiqc.info/)). +

+ +

+ ## Quick Start 1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 8e9e4f83..377392f3 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/bamtofastq Methods Description" section_href: "https://github.com/nf-core/bamtofastq" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/bamtofastq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

@@ -13,7 +11,16 @@ data: |

References

Notes:
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 7f0809c4..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# custom_logo: ../../../docs/images/nf-core_sarek_logo.png -custom_logo_url: https://github.com/qbic-pipelines/bamtofastq/ -custom_logo_title: "qbic-pipelines/bamtofastq" - -report_comment: > - This report has been generated by the qbic-pipelines/bamtofastq - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - qbic-pipelines/bamtofastq-software-versions: - order: -1000 - qbic-pipelines-bamtofastq-summary: - order: -1100 - -top_modules: - - "fastqc": - name: "FastQC (Input Bam)" - path_filters_exclude: - - "*singleton_fastqc*" - - "*.1_fastqc*" - - "*.2_fastqc*" - - "samtools": - name: "Samtools (Input Bam)" - - "fastqc": - name: "FastQC (Output Reads)" - path_filters: - - "*singleton_fastqc*" - - "*.1_fastqc*" - - "*.2_fastqc*" - -export_plots: true diff --git a/assets/qbic-pipelines-bamtofastq_logo.png b/assets/qbic-pipelines-bamtofastq_logo.png deleted file mode 100644 index 15d18be4..00000000 Binary files a/assets/qbic-pipelines-bamtofastq_logo.png and /dev/null differ diff --git a/assets/qbic-pipelines-bamtofastq_logo.svg b/assets/qbic-pipelines-bamtofastq_logo.svg deleted file mode 100644 index 64a459e9..00000000 --- a/assets/qbic-pipelines-bamtofastq_logo.svg +++ /dev/null @@ -1,391 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - bamtofastq - - - qbic-pipelines/ - - - An open-source pipeline converting (un)mapped single-end or paired-end bam files to fastq.gz - - diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index dc41f6e9..0e3a0add 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,3 @@ sample_id,mapped,index,file_type -test1,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.bam,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/First_SmallTest_Paired.bam.bai,bam -test2,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.bam,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/Second_SmallTest_Paired.bam.bai,bam -test3,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam,https:/raw.githubusercontent.com/qbic-pipelines/bamtofastq/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai,bam +test,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai,bam +test2,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://github.com/nf-core/test-datasets/raw/bamtofastq/test-datasets/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai,bam diff --git a/conf/awsbatch.config b/conf/awsbatch.config deleted file mode 100644 index 1b451f13..00000000 --- a/conf/awsbatch.config +++ /dev/null @@ -1,18 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running on AWS batch - * ------------------------------------------------- - * Base config needed for running with -profile awsbatch - */ -params { - config_profile_name = 'AWSBATCH' - config_profile_description = 'AWSBATCH Cloud Profile' - config_profile_contact = 'Alexander Peltzer (@apeltzer)' - config_profile_url = 'https://aws.amazon.com/de/batch/' -} - -aws.region = params.awsregion -process.executor = 'awsbatch' -process.queue = params.awsqueue -executor.awscli = '/home/ec2-user/miniconda/bin/aws' -params.tracedir = './' diff --git a/conf/base.config b/conf/base.config index 7ad48ae2..de66f608 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,54 +10,54 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 7.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_low { - cpus = { check_max( 7 * task.attempt, 'cpus' ) } - memory = { check_max( 15.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 15 * task.attempt, 'cpus' ) } - memory = { check_max( 31.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 15 * task.attempt, 'cpus' ) } - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - time = { check_max( 10.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index f1d53d33..00000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,432 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} diff --git a/conf/modules.config b/conf/modules.config index 23bb7e7a..0aa502e0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -40,10 +40,11 @@ process { withName: 'SAMTOOLS_COLLATEFASTQ_SINGLE_END' { ext.args = { params.samtools_collate_fast ? "-f -r " + params.reads_in_memory : "" } + ext.args2 = '-N' publishDir = [ path: { "${params.outdir}/reads" }, mode: params.publish_dir_mode, - pattern: '*singleton.fq.gz' + pattern: '*{other.fq.gz}' ] } diff --git a/conf/test.config b/conf/test.config index 5b60d317..e36d6072 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,14 +11,14 @@ */ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '48.h' + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' - // Input data - input ="${projectDir}/testdata/test_bam_samplesheet.csv" + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" } diff --git a/conf/test_chr.config b/conf/test_chr.config index 3c4c04a6..d91e33b2 100644 --- a/conf/test_chr.config +++ b/conf/test_chr.config @@ -4,11 +4,20 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test + * nextflow run nf-core/bamtofastq -profile test,test_chr, --outdir */ -includeConfig 'test.config' params { - chr = 'chrX chrY X Y' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_chr_samplesheet.csv" + chr = 'chrX chrY X Y' } diff --git a/conf/test_collate_fast.config b/conf/test_collate_fast.config new file mode 100644 index 00000000..d779bb28 --- /dev/null +++ b/conf/test_collate_fast.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + samtools_collate_fast = true +} diff --git a/conf/test_cram.config b/conf/test_cram.config index 7f3fc277..7e9fe34a 100644 --- a/conf/test_cram.config +++ b/conf/test_cram.config @@ -4,19 +4,20 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run qbic-pipelines/bamtofastq -profile test_cram + * nextflow run nf-core/bamtofastq -profile test,test_cram, --outdir */ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - input ="${projectDir}/testdata/test_cram_samplesheet.csv" + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' - fasta = 'ftp://ftp.broadinstitute.org/pub/seq/references/Homo_sapiens_assembly19.fasta' + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_cram_samplesheet.csv' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/conf/test_full.config b/conf/test_full.config index 26df7e6f..30136260 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -13,14 +13,10 @@ cleanup = true params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_full_samplesheet.csv' + fasta = 's3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta' } diff --git a/conf/test_no_bai.config b/conf/test_no_bai.config index 8511ae85..d5befca8 100644 --- a/conf/test_no_bai.config +++ b/conf/test_no_bai.config @@ -5,20 +5,22 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/bamtofastq -profile test, --outdir + nextflow run nf-core/bamtofastq -profile test,test_no_bai, --outdir ---------------------------------------------------------------------------------------- */ + params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '48.h' - // Input data - input ="${projectDir}/testdata/test_bam_no_index.csv" + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet_no_bai.csv' } diff --git a/conf/test_no_crai.config b/conf/test_no_crai.config index f0d7597c..aee1bd67 100644 --- a/conf/test_no_crai.config +++ b/conf/test_no_crai.config @@ -5,22 +5,24 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/bamtofastq -profile test, --outdir + nextflow run nf-core/bamtofastq -profile test,test_no_crai, --outdir ---------------------------------------------------------------------------------------- */ + + params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test no crai profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '48.h' + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' - // Input data - input ="${projectDir}/testdata/test_cram_no_crai.csv" - fasta = 'ftp://ftp.broadinstitute.org/pub/seq/references/Homo_sapiens_assembly19.fasta' + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_cram_samplesheet_no_crai.csv' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/conf/test_no_qc.config b/conf/test_no_qc.config new file mode 100644 index 00000000..9461bb5b --- /dev/null +++ b/conf/test_no_qc.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + no_read_qc = true +} diff --git a/conf/test_no_stats.config b/conf/test_no_stats.config new file mode 100644 index 00000000..24341b82 --- /dev/null +++ b/conf/test_no_stats.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bamtofastq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '48.h' + + // Input data + input = "https://raw.githubusercontent.com/nf-core/test-datasets/bamtofastq/samplesheet/test_bam_samplesheet.csv" + no_stats = true +} diff --git a/docs/images/nf-core-bamtofastq-subway.png b/docs/images/nf-core-bamtofastq-subway.png new file mode 100644 index 00000000..630c8ea1 Binary files /dev/null and b/docs/images/nf-core-bamtofastq-subway.png differ diff --git a/docs/images/nf-core-bamtofastq-subway.svg b/docs/images/nf-core-bamtofastq-subway.svg new file mode 100644 index 00000000..fc017716 --- /dev/null +++ b/docs/images/nf-core-bamtofastq-subway.svg @@ -0,0 +1,1480 @@ + + + +bambambambaibamfastabamfaibamcrambamcraiindexpaired-endsingle-endoptionalsamtoolsPre-conversion QCAlignment to FastQPre-processingfaidxfastqcfastqcmultiQCflagstatidxstatsstatsextract chromosomecheck if paired-endviewcollate fastqcat fastqhtmlfastq diff --git a/docs/images/qbic-pipelines-bamtofastq_logo.png b/docs/images/qbic-pipelines-bamtofastq_logo.png deleted file mode 100644 index fcd011a7..00000000 Binary files a/docs/images/qbic-pipelines-bamtofastq_logo.png and /dev/null differ diff --git a/docs/images/qbic-pipelines-bamtofastq_logo.svg b/docs/images/qbic-pipelines-bamtofastq_logo.svg deleted file mode 100644 index 866974a8..00000000 --- a/docs/images/qbic-pipelines-bamtofastq_logo.svg +++ /dev/null @@ -1,489 +0,0 @@ - - - - - - - - image/svg+xml - - - - - - - - - diff --git a/docs/output.md b/docs/output.md index 83e41dea..137c944e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -50,7 +50,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [Samtools](https://www.htslib.org) is used to extract reads from the bam files and to compute some BAM/CRAM statistics. -The converted and gzipped fastq output reads are written to the directory `results/reads/*.f(ast)?q.gz`. +The converted and gzipped fastq output reads are written to the directory `results/reads/`. + +
+Read files + +- `reads/` + - `*.merged.fastq.gz`: Paired-end read files + - `*.other.fq.gz` : Single-end read files + +
### MultiQC diff --git a/docs/usage.md b/docs/usage.md index 2dc05eda..8a857435 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,20 +18,20 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The pipeline will auto-detect whether a sample is single- or paired-end. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. If the index files are not available, the files will be automatically indexed during the pipeline run which can have an effect on the runtime. ```console sample_id,mapped,index,file_type -test1,First_SmallTest_Paired.cram,First_SmallTest_Paired.cram.crai,cram -test2,Second_SmallTest_Paired.cram,Second_SmallTest_Paired.cram.crai,cram +test1,test1.cram,test1.cram.crai,cram +test2,test2.cram,test2.cram.crai,cram ``` -| Column | Description | -| ---------- | ----------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. | -| `mapped` | Full path to input BAM/CRAM file. File has to have the extension ".bam" or ".cram". | -| `index` | If available provide full path to input BAI/CRAI index file. File has to have the extension ".bam.bai" or ".cram.crai". | -| `filetype` | For input BAM files the filetype hast to be "bam" and for input CRAM files, the filetype needs to be "cram". | +| Column | Description | +| ---------- | ----------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. | +| `mapped` | Absolute path to input BAM/CRAM file. Allowed file extensions: ".bam" or ".cram". | +| `index` | If available, provide full path to input BAI/CRAI index file. File extensions must be ".bam.bai" or ".cram.crai". | +| `filetype` | Type of input file. Options: "bam" or "cram". | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -70,8 +70,7 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' -input: 'data' +fasta: './reference.fasta' <...> ``` @@ -142,7 +141,8 @@ Use this to specify the location of your input BAM/CRAM files. For example: ### `--fasta` -Use this option to indicate which reference genome FASTA file to use when decompressing CRAM files. It will only work if the reference genome FASTA file listed in the CRAM header is available (_e.g._ via HTTP/FTP or on the local file system). Otherwise, you will need to use the [`--fasta`](#--fasta) option. You can check which reference FASTA file is indicated in the CRAM header with the following command: +When converting a CRAM file the fasta file specified in the CRAM header should be used to decompress the file. If that file is not available, you will need to specify an alternative path using the [`--fasta`](#--fasta) option. +You can check which reference FASTA file should be used by inspecting the CRAM file with the following command: ```bash samtools view -H path/to/sample.cram | grep '@SQ'. @@ -166,7 +166,12 @@ For example: --chr 'X chrX' ``` -This extracts reads mapping to `X` as well as `chrX` +This extracts reads mapping to `X` as well as `chrX`. +To check beforehand which chromosome notation is used in your bam/cram file you can use samtools. + +```bash +samtools idxstats your_input.[bam|cram] | head -n 25 +``` ### `--no_read_QC` (optional) diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 33cd4f6e..4d296814 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -2,6 +2,7 @@ // This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. // +import nextflow.Nextflow import org.everit.json.schema.Schema import org.everit.json.schema.loader.SchemaLoader import org.everit.json.schema.ValidationException @@ -177,7 +178,7 @@ class NfcoreSchema { } if (has_error) { - System.exit(1) + Nextflow.error('Exiting!') } } diff --git a/lib/WorkflowBamtofastq.groovy b/lib/WorkflowBamtofastq.groovy index 562bc9c8..0cec579b 100755 --- a/lib/WorkflowBamtofastq.groovy +++ b/lib/WorkflowBamtofastq.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/bamtofastq.nf in the nf-core/bamtofastq pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowBamtofastq { @@ -61,12 +62,12 @@ class WorkflowBamtofastq { // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } } diff --git a/main.nf b/main.nf index 38ca6d9f..febc1463 100644 --- a/main.nf +++ b/main.nf @@ -18,7 +18,6 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.chr = WorkflowMain.getGenomeAttribute(params, 'chr') params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.fasta_fai = WorkflowMain.getGenomeAttribute(params, 'fasta_fai') diff --git a/modules.json b/modules.json index 1c2e7e64..c1213160 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", "installed_by": ["modules"] }, "fastqc": { @@ -27,42 +27,42 @@ }, "samtools/collatefastq": { "branch": "master", - "git_sha": "ad1b48cff38a49d62ed8e91d921790c03b0a9c92", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] } } diff --git a/modules/local/check_paired_end.nf b/modules/local/check_paired_end.nf index 24336dbc..da5fb289 100644 --- a/modules/local/check_paired_end.nf +++ b/modules/local/check_paired_end.nf @@ -2,10 +2,10 @@ process CHECK_IF_PAIRED_END { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(index) @@ -24,7 +24,7 @@ process CHECK_IF_PAIRED_END { def prefix = task.ext.prefix ?: "${meta.id}" def reference = meta.filetype == "cram" ? "--reference ${fasta}" : "" """ - if [ \$({ samtools view -H $reference $input -@$task.cpus ; samtools view $reference $input -@$task.cpus | head -n1000; } | samtools view $reference $reference -c -f 1 -@$task.cpus | awk '{print \$1/1000}') = "1" ]; then + if [ \$({ samtools view -H $reference $input -@$task.cpus ; samtools view $reference $input -@$task.cpus | head -n1000; } | samtools view $reference -c -f 1 -@$task.cpus | awk '{print \$1/1000}') = "1" ]; then echo 1 > ${prefix}.paired.txt else echo 0 > ${prefix}.single.txt diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100755 new mode 100644 diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf index 857f60c6..b8f15644 100644 --- a/modules/nf-core/samtools/collatefastq/main.nf +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_COLLATEFASTQ { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index ce6580d2..21be8bad 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index 2120cd7d..4b3070fc 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index a7b87d8b..5eefc058 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8b95687a..19d25cae 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index a80ff3a2..ebd64221 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 0a2a3640..8dbcc53b 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 729c85e5..d7b2a0d3 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.16.1" + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(index) diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 2e597d34..76916033 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -27,8 +27,8 @@ input: pattern: "*.{bam,cram,sam}" - index: type: optional file - description: BAM.BAI/CRAM.CRAI file - pattern: "*.{.bai,.crai}" + description: BAM.BAI/BAM.CSI/CRAM.CRAI file + pattern: "*.{.bai,.csi,.crai}" - fasta: type: optional file description: Reference file the CRAM was created with diff --git a/nextflow.config b/nextflow.config index 50b99ad3..be42ac34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -155,6 +155,8 @@ profiles { test_chr { includeConfig 'conf/test_chr.config' } test_cram { includeConfig 'conf/test_cram.config' } test_no_crai { includeConfig 'conf/test_no_crai.config' } + test_no_stats { includeConfig 'conf/test_no_stats.config' } + test_no_qc { includeConfig 'conf/test_no_qc.config' } } diff --git a/subworkflows/local/prepare_indices.nf b/subworkflows/local/prepare_indices.nf index 7d1c033e..86cd7daf 100644 --- a/subworkflows/local/prepare_indices.nf +++ b/subworkflows/local/prepare_indices.nf @@ -12,8 +12,6 @@ include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/fa workflow PREPARE_INDICES { take: - index_provided // boolean determined automatically - //cram_input // boolean determined automatically input // channel: [meta, alignment (BAM or CRAM), []] fasta // optional: reference file if CRAM format and reference not in header @@ -21,16 +19,26 @@ workflow PREPARE_INDICES { ch_versions = Channel.empty() - // INDEX BAM/CRAM if not provided - ch_input = input.map{ it -> [it[0], it[1]] } ch_out = Channel.empty() - if (!index_provided){ - SAMTOOLS_INDEX((input.map{ it -> [it[0], it[1]] })) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - ch_index_files = Channel.empty().mix(SAMTOOLS_INDEX.out.bai, SAMTOOLS_INDEX.out.crai) - ch_out = ch_input.join(ch_index_files) - - } + + // Determine if INDEX provided + input.branch{ + is_indexed: it[0].index == true + to_index: it[0].index == false + }.set{samtools_input} + + // Remove empty INDEX [] from channel + input_to_index = samtools_input.to_index.map{ it -> [it[0], it[1]] } + + // INDEX BAM/CRAM only if not provided + SAMTOOLS_INDEX(input_to_index) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + ch_index_files = Channel.empty().mix(SAMTOOLS_INDEX.out.bai, SAMTOOLS_INDEX.out.crai) + + // Combine channels + ch_new = input_to_index.join(ch_index_files) + ch_out = samtools_input.is_indexed.mix(ch_new) + // INDEX FASTA fasta_fai = [] @@ -41,10 +49,9 @@ workflow PREPARE_INDICES { } - // Gather versions of all tools used emit: - ch_input = ch_out - fasta_fai = fasta_fai - versions = ch_versions + ch_input_indexed = ch_out + fasta_fai = fasta_fai + versions = ch_versions } diff --git a/testdata/First_SmallTest_Paired.bam b/testdata/First_SmallTest_Paired.bam deleted file mode 100644 index 4d4d3b84..00000000 Binary files a/testdata/First_SmallTest_Paired.bam and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.bam.bai b/testdata/First_SmallTest_Paired.bam.bai deleted file mode 100644 index 5f072d8b..00000000 Binary files a/testdata/First_SmallTest_Paired.bam.bai and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.cram b/testdata/First_SmallTest_Paired.cram deleted file mode 100644 index 07da5489..00000000 Binary files a/testdata/First_SmallTest_Paired.cram and /dev/null differ diff --git a/testdata/First_SmallTest_Paired.cram.crai b/testdata/First_SmallTest_Paired.cram.crai deleted file mode 100644 index 62999df3..00000000 Binary files a/testdata/First_SmallTest_Paired.cram.crai and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.bam b/testdata/Second_SmallTest_Paired.bam deleted file mode 100644 index 2b4412f6..00000000 Binary files a/testdata/Second_SmallTest_Paired.bam and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.bam.bai b/testdata/Second_SmallTest_Paired.bam.bai deleted file mode 100644 index 9f11d9ed..00000000 Binary files a/testdata/Second_SmallTest_Paired.bam.bai and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.cram b/testdata/Second_SmallTest_Paired.cram deleted file mode 100644 index 21846f1b..00000000 Binary files a/testdata/Second_SmallTest_Paired.cram and /dev/null differ diff --git a/testdata/Second_SmallTest_Paired.cram.crai b/testdata/Second_SmallTest_Paired.cram.crai deleted file mode 100644 index 90e449aa..00000000 Binary files a/testdata/Second_SmallTest_Paired.cram.crai and /dev/null differ diff --git a/testdata/local.csv b/testdata/local.csv deleted file mode 100644 index ae446ce9..00000000 --- a/testdata/local.csv +++ /dev/null @@ -1,2 +0,0 @@ - //fasta = '/home-link/afijo01/repos/bamtofastq/testdata/Homo_sapiens_assembly19.fasta' - // test_cram_local \ No newline at end of file diff --git a/testdata/test_bam_local.csv b/testdata/test_bam_local.csv deleted file mode 100644 index d890da41..00000000 --- a/testdata/test_bam_local.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,/home/afijo01/repos/bamtofastq/testdata/First_SmallTest_Paired.bam,/home/afijo01/repos/bamtofastq/testdata/First_SmallTest_Paired.bam.bai,bam -test2,/home/afijo01/repos/bamtofastq/testdata/Second_SmallTest_Paired.bam,/home/afijo01/repos/bamtofastq/testdata/Second_SmallTest_Paired.bam.bai,bam -test3,/home/afijo01/repos/bamtofastq/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam,/home/afijo01/repos/bamtofastq/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai,bam \ No newline at end of file diff --git a/testdata/test_bam_no_index.csv b/testdata/test_bam_no_index.csv deleted file mode 100644 index 06d8a3d7..00000000 --- a/testdata/test_bam_no_index.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/First_SmallTest_Paired.bam,,bam -test2,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/Second_SmallTest_Paired.bam,,bam -test3,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam,,bam \ No newline at end of file diff --git a/testdata/test_bam_samplesheet.csv b/testdata/test_bam_samplesheet.csv deleted file mode 100644 index c2ea5b97..00000000 --- a/testdata/test_bam_samplesheet.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/First_SmallTest_Paired.bam,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/First_SmallTest_Paired.bam.bai,bam -test2,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/Second_SmallTest_Paired.bam,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/Second_SmallTest_Paired.bam.bai,bam -test3,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam,https://github.com/qbic-pipelines/bamtofastq/raw/master/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai,bam \ No newline at end of file diff --git a/testdata/test_cram_github_new_api.csv b/testdata/test_cram_github_new_api.csv deleted file mode 100644 index 956b3e11..00000000 --- a/testdata/test_cram_github_new_api.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/First_SmallTest_Paired.cram,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/First_SmallTest_Paired.cram.crai,cram -test2,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/Second_SmallTest_Paired.cram,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/Second_SmallTest_Paired.cram.crai,cram -test3,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram,https:/github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai,cram \ No newline at end of file diff --git a/testdata/test_cram_local.csv b/testdata/test_cram_local.csv deleted file mode 100644 index 0a9d501f..00000000 --- a/testdata/test_cram_local.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,/home/afijo01/repos/bamtofastq/testdata/First_SmallTest_Paired.cram,/home/afijo01/repos/bamtofastq/testdata/First_SmallTest_Paired.cram.crai,cram -test2,/home/afijo01/repos/bamtofastq/testdata/Second_SmallTest_Paired.cram,/home/afijo01/repos/bamtofastq/testdata/Second_SmallTest_Paired.cram.crai,cram -test3,/home/afijo01/repos/bamtofastq/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram,/home/afijo01/repos/bamtofastq/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai,cram \ No newline at end of file diff --git a/testdata/test_cram_no_crai.csv b/testdata/test_cram_no_crai.csv deleted file mode 100644 index 0b2cf329..00000000 --- a/testdata/test_cram_no_crai.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/First_SmallTest_Paired.cram,,cram -test2,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/Second_SmallTest_Paired.cram,,cram -test3,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram,,cram \ No newline at end of file diff --git a/testdata/test_cram_samplesheet.csv b/testdata/test_cram_samplesheet.csv deleted file mode 100644 index 284d4240..00000000 --- a/testdata/test_cram_samplesheet.csv +++ /dev/null @@ -1,4 +0,0 @@ -sample_id,mapped,index,file_type -test1,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/First_SmallTest_Paired.cram,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/First_SmallTest_Paired.cram.crai,cram -test2,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/Second_SmallTest_Paired.cram,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/Second_SmallTest_Paired.cram.crai,cram -test3,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram,https://github.com/SusiJo/bamtofastq/raw/dls2_new/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai,cram \ No newline at end of file diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam deleted file mode 100644 index 1329d402..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam and /dev/null differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai deleted file mode 100644 index ab60f0a6..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.bam.bai and /dev/null differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram deleted file mode 100644 index 3349f6fb..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram and /dev/null differ diff --git a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai b/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai deleted file mode 100644 index 0672dca5..00000000 Binary files a/testdata/wgEncodeUwRepliSeqK562G1AlnRep1.cram.crai and /dev/null differ diff --git a/tests/test.yml b/tests/test.yml new file mode 100644 index 00000000..c1b03f8f --- /dev/null +++ b/tests/test.yml @@ -0,0 +1,30 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker + tags: + - test + - bam + - default + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_chr.yml b/tests/test_chr.yml new file mode 100644 index 00000000..a5c8ae78 --- /dev/null +++ b/tests/test_chr.yml @@ -0,0 +1,18 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_chr,docker + tags: + - test + - chromosomes + files: + - path: results/reads/test3.chrX_chrY_X_Y_other.fq.gz + md5sum: fee527cf707a00e16065616a1fd1cb9e + - path: results/samtools/test3.chrX_chrY_X_Y.bam + md5sum: 751210b16706b6309f41a92a8c44952c + - path: results/samtools/test3.flagstat + md5sum: 7b43b975b0a395040bf1ee3c48f058e7 + - path: results/samtools/test3.idxstats + md5sum: 094096b6f303cc18892e1e027c87f273 + - path: results/samtools/test3.stats + md5sum: 5265894a506071133a29ef1ff5fb48b8 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_collate_fast.yml b/tests/test_collate_fast.yml new file mode 100644 index 00000000..06753eac --- /dev/null +++ b/tests/test_collate_fast.yml @@ -0,0 +1,27 @@ +command: nextflow run main.nf -profile test,docker --samtools_collate_fast +tags: + - test + - collate_fast +files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: df8cf0d9fc7a7f6b23115592a6ff3261 + - path: results/reads/test2_1.merged.fastq.gz + md5sum: 6d6959d6955cd91f5c59f4b0fa4912bf + - path: results/reads/test2_2.merged.fastq.gz + md5sum: 6aeb5b48ffef5697a4d3c61c488735f8 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 8a4e01c993334bf2f9b40eb8e0ed69fb + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_cram.yml b/tests/test_cram.yml new file mode 100644 index 00000000..7394639a --- /dev/null +++ b/tests/test_cram.yml @@ -0,0 +1,29 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_cram,docker + tags: + - test + - cram + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: 6e4ad43d69cf225eb4b2bdc11401dd6e + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: ca5c3f558faef2cb5b50e5b015dcc231 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_bai.yml b/tests/test_no_bai.yml new file mode 100644 index 00000000..754e8881 --- /dev/null +++ b/tests/test_no_bai.yml @@ -0,0 +1,39 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_no_bai,docker + tags: + - test + - bam + - no_bai + - paired-end + - single-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/reads/test3_other.fq.gz + md5sum: 274765d73ffe4448503b1bf18f7f4880 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test3.flagstat + md5sum: 7b43b975b0a395040bf1ee3c48f058e7 + - path: results/samtools/test3.idxstats + md5sum: 094096b6f303cc18892e1e027c87f273 + - path: results/samtools/test3.stats + md5sum: 5265894a506071133a29ef1ff5fb48b8 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_crai.yml b/tests/test_no_crai.yml new file mode 100644 index 00000000..bf54a35f --- /dev/null +++ b/tests/test_no_crai.yml @@ -0,0 +1,30 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,test_no_crai,docker + tags: + - test + - cram + - no_crai + - paired-end + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: 6e4ad43d69cf225eb4b2bdc11401dd6e + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: ca5c3f558faef2cb5b50e5b015dcc231 + - path: results/fastqc + - path: results/multiqc diff --git a/tests/test_no_qc.yml b/tests/test_no_qc.yml new file mode 100644 index 00000000..556a2144 --- /dev/null +++ b/tests/test_no_qc.yml @@ -0,0 +1,27 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker --no_read_QC + tags: + - test + - no_qc + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/samtools/test2.flagstat + md5sum: acbd95030a362a7e6491faa87d1b9c2c + - path: results/samtools/test2.idxstats + md5sum: 90f01313bc89c7fd096aad051cd71b66 + - path: results/samtools/test2.stats + md5sum: f89e29166ff872b38ee72e15d44ae8c3 + - path: results/samtools/test.flagstat + md5sum: a53f3d26e2e9851f7d528442bbfe9781 + - path: results/samtools/test.idxstats + md5sum: e179601fa7b8ebce81ac3765206f6c15 + - path: results/samtools/test.stats + md5sum: f81d34302eec687c43539432e81022d4 + - path: results/multiqc diff --git a/tests/test_no_stats.yml b/tests/test_no_stats.yml new file mode 100644 index 00000000..464a0ee6 --- /dev/null +++ b/tests/test_no_stats.yml @@ -0,0 +1,16 @@ +- name: Run test profile + command: nextflow run main.nf -profile test,docker --no_stats + tags: + - test + - no_stats + files: + - path: results/reads/test_1.merged.fastq.gz + md5sum: 15ab79e32e45138d29b6278f4c32a8ca + - path: results/reads/test2_1.merged.fastq.gz + md5sum: d0fd4034e5f07590b0944e3cf24cff60 + - path: results/reads/test2_2.merged.fastq.gz + md5sum: bcb8df2cda7006efb30bbf2c8830e761 + - path: results/reads/test_2.merged.fastq.gz + md5sum: 2c2dbdf1a7584efb6861ad1ac5efbb43 + - path: results/fastqc + - path: results/multiqc diff --git a/workflows/bamtofastq.nf b/workflows/bamtofastq.nf index 40dc3ae7..8a6ad283 100644 --- a/workflows/bamtofastq.nf +++ b/workflows/bamtofastq.nf @@ -27,11 +27,8 @@ if (params.input) { ch_input = extract_csv(file(params.input, checkIfExists: tru fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.value([]) fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.value([]) -// Initialize value based on input -index_provided = ch_input.map{it -> it[2]} == [] ? true : false - // Initialize value channels based on params -chr = params.chr ?: Channel.empty() +chr = params.chr ?: Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -103,15 +100,15 @@ workflow BAMTOFASTQ { // SUBWORKFLOW: Prepare indices bai/crai/fai if not provided PREPARE_INDICES( - index_provided, ch_input, fasta ) + ch_versions = ch_versions.mix(PREPARE_INDICES.out.versions) - fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_INDICES.out.fasta_fai : [] + fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_INDICES.out.fasta_fai : [] - ch_input = index_provided ? ch_input : PREPARE_INDICES.out.ch_input + ch_input = PREPARE_INDICES.out.ch_input_indexed // SUBWORKFLOW: Pre conversion QC and stats @@ -122,24 +119,49 @@ workflow BAMTOFASTQ { ch_versions = ch_versions.mix(PRE_CONVERSION_QC.out.versions) + // MODULE: Check if SINGLE or PAIRED-END + + CHECK_IF_PAIRED_END(ch_input, fasta) + + ch_paired_end = ch_input.join(CHECK_IF_PAIRED_END.out.paired_end) + ch_single_end = ch_input.join(CHECK_IF_PAIRED_END.out.single_end) + + // Combine channels into new input channel for conversion + add info about single/paired to meta map + ch_input_new = ch_single_end.map{ meta, bam, bai, txt -> + [ [ id : meta.id, + filetype : meta.filetype, + single_end : true ], + bam, + bai + ] }.mix(ch_paired_end.map{ meta, bam, bai, txt -> + [ [ id : meta.id, + filetype : meta.filetype, + single_end : false ], + bam, + bai + ] }) + + ch_versions = ch_versions.mix(CHECK_IF_PAIRED_END.out.versions) + + // Extract only reads mapping to a chromosome if (params.chr) { - SAMTOOLS_CHR(ch_input, fasta, []) + SAMTOOLS_CHR(ch_input_new, fasta, []) - samtools_chr_out = Channel.empty().mix(SAMTOOLS_CHR.out.bam, + samtools_chr_out = Channel.empty().mix( SAMTOOLS_CHR.out.bam, SAMTOOLS_CHR.out.cram) SAMTOOLS_CHR_INDEX(samtools_chr_out) - ch_input = samtools_chr_out.join(Channel.empty().mix(SAMTOOLS_CHR_INDEX.out.bai, - SAMTOOLS_CHR_INDEX.out.crai)) - + ch_input_chr = samtools_chr_out.join(Channel.empty().mix( SAMTOOLS_CHR_INDEX.out.bai, + SAMTOOLS_CHR_INDEX.out.crai )) // Add chr names to id - ch_input = ch_input.map{ it -> + ch_input_new = ch_input_chr.map{ it -> new_id = it[1].baseName [[ id : new_id, - filetype : it[0].filetype + filetype : it[0].filetype, + single_end: it[0].single_end ], it[1], it[2]] } @@ -149,21 +171,18 @@ workflow BAMTOFASTQ { } - // MODULE: Check if SINLGE or PAIRED-END - - CHECK_IF_PAIRED_END(ch_input, fasta) - - ch_paired_end = ch_input.join(CHECK_IF_PAIRED_END.out.paired_end) - ch_single_end = ch_input.join(CHECK_IF_PAIRED_END.out.single_end) - - ch_versions = ch_versions.mix(CHECK_IF_PAIRED_END.out.versions) - // MODULE: SINGLE-END Alignment to FastQ (SortExtractSingleEnd) def interleave = false + ch_input_new.branch{ + ch_single: it[0].single_end == true + ch_paired: it[0].single_end == false + }.set{conversion_input} + + // Module needs info about single-endedness SAMTOOLS_COLLATEFASTQ_SINGLE_END( - ch_single_end.map{it -> [it[0], it[1]]}, // meta, bam - fasta.map{ it -> // meta, fasta + conversion_input.ch_single.map{ it -> [ it[0], it[1] ]}, // meta, bam/cram + fasta.map{ it -> // meta, fasta def new_id = "" if(it) { new_id = it[0].baseName @@ -178,7 +197,7 @@ workflow BAMTOFASTQ { // ALIGNMENT_TO_FASTQ ( - ch_paired_end.map{it -> [it[0], it[1], it[2]]}, // meta, file, index + conversion_input.ch_paired, fasta, fasta_fai ) @@ -256,33 +275,27 @@ def extract_csv(csv_file) { def line, numberOfLinesInSampleSheet = 0; while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} if (numberOfLinesInSampleSheet < 2) { - log.error "Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." - System.exit(1) + error("Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines.") } } Channel.from(csv_file).splitCsv(header: true) .map{ row -> if ( !row.sample_id ) { // This also handles the case where the lane is left as an empty string - log.error('The sample sheet should specify a sample_id for each row.\n' + row.toString()) - System.exit(1) + error('The sample sheet should specify a sample_id for each row.\n' + row.toString()) } if ( !row.mapped ) { // This also handles the case where the lane is left as an empty string - log.error('The sample sheet should specify a mapped file for each row.\n' + row.toString()) - System.exit(1) + error('The sample sheet should specify a mapped file for each row.\n' + row.toString()) } if (!row.file_type) { // This also handles the case where the lane is left as an empty string - log.error('The sample sheet should specify a file_type for each row, valid values are bam/cram.\n' + row.toString()) - System.exit(1) + error('The sample sheet should specify a file_type for each row, valid values are bam/cram.\n' + row.toString()) } if (!(row.file_type == "bam" || row.file_type == "cram")) { - log.error('The file_type for the row below is neither "bam" nor "cram". Please correct this.\n' + row.toString() ) - System.exit(1) + error('The file_type for the row below is neither "bam" nor "cram". Please correct this.\n' + row.toString() ) } if (row.file_type != file(row.mapped).getExtension().toString()) { - log.error('The file extension does not fit the specified file_type.\n' + row.toString() ) - System.exit(1) + error('The file extension does not fit the specified file_type.\n' + row.toString() ) } - + // init meta map def meta = [:] @@ -291,6 +304,7 @@ def extract_csv(csv_file) { def mapped = file(row.mapped, checkIfExists: true) def index = row.index ? file(row.index, checkIfExists: true) : [] meta.filetype = "${row.file_type}".toString() + meta.index = row.index ? true : false return [meta, mapped, index]