From 45f1dd7798a181417c91c355a81f5f2ec8d7976f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 2 Aug 2023 15:23:22 +0200 Subject: [PATCH] add nf-validation plugin --- CHANGELOG.md | 2 ++ assets/multiqc_config.yml | 4 +-- assets/schema_input.json | 39 ++++++++++++++------------ nextflow_schema.json | 3 +- workflows/bamtofastq.nf | 58 ++++++++------------------------------- 5 files changed, 39 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b39427c2..edc1a37f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#63](https://github.com/nf-core/bamtofastq/pull/63) Replace extract_csv with nf-validation plugin + ### `Fixed` - [#62](https://github.com/nf-core/bamtofastq/pull/62) Adjust subway map for dark mode. diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 70f11347..aafa3040 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/bamtofastq + This report has been generated by the nf-core/bamtofastq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-bamtofastq-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index c22a9b4b..f5b99abf 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,30 +7,33 @@ "items": { "type": "object", "properties": { - "sample": { + "sample_id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "The sample sheet should specify a sample_id for each row which cannot contain spaces.", + "meta": ["id"] }, - "fastq_1": { + "mapped": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(bam|cram)$", + "errorMessage": "The sample sheet should specify a mapped file for each row, either '.bam' or '.cram'" }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "index": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(bai|crai)$", + "errorMessage": "Index files can be provided but must have the extension '.bai' or '.crai'" + }, + "file_type": { + "type": "string", + "errorMessage": "Input filetypes must either be 'bam' or 'cram'", + "meta": ["filetype"], + "enums": ["bam", "cram"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample_id", "mapped", "file_type"] } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3155f2b1..bd6751e8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,7 +20,8 @@ "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/bamtofastq/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json" }, "outdir": { "type": "string", diff --git a/workflows/bamtofastq.nf b/workflows/bamtofastq.nf index ff1ca50e..dbd90e78 100644 --- a/workflows/bamtofastq.nf +++ b/workflows/bamtofastq.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) def citation = '\n' + WorkflowMain.citation(workflow) + '\n' @@ -16,8 +16,18 @@ log.info logo + paramsSummaryLog(workflow) + citation WorkflowBamtofastq.initialise(params, log) // Check mandatory parameters -ch_input = extract_csv(file(params.input)) +ch_input = Channel.fromSamplesheet("input") + .map{ meta, mapped, index -> + if (meta.filetype != mapped.getExtension().toString()) { + error('The file extension does not fit the specified file_type.\n' + mapped.toString() ) + } + + meta.index = index ? true : false + + return [meta, mapped, index] + + } // Initialize file channels based on params fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.value([]) @@ -255,50 +265,6 @@ workflow.onComplete { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Function to extract information (meta data + file(s)) from csv file(s) -def extract_csv(csv_file) { - - // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. - file(csv_file).withReader('UTF-8') { reader -> - def line, numberOfLinesInSampleSheet = 0; - while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} - if (numberOfLinesInSampleSheet < 2) { - error("Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines.") - } - } - Channel.from(csv_file).splitCsv(header: true) - .map{ row -> - if ( !row.sample_id ) { // This also handles the case where the lane is left as an empty string - error('The sample sheet should specify a sample_id for each row.\n' + row.toString()) - } - if ( !row.mapped ) { // This also handles the case where the lane is left as an empty string - error('The sample sheet should specify a mapped file for each row.\n' + row.toString()) - } - if (!row.file_type) { // This also handles the case where the lane is left as an empty string - error('The sample sheet should specify a file_type for each row, valid values are bam/cram.\n' + row.toString()) - } - if (!(row.file_type == "bam" || row.file_type == "cram")) { - error('The file_type for the row below is neither "bam" nor "cram". Please correct this.\n' + row.toString() ) - } - if (row.file_type != file(row.mapped).getExtension().toString()) { - error('The file extension does not fit the specified file_type.\n' + row.toString() ) - } - - - // init meta map - def meta = [:] - - meta.id = "${row.sample_id}".toString() - def mapped = file(row.mapped, checkIfExists: true) - def index = row.index ? file(row.index, checkIfExists: true) : [] - meta.filetype = "${row.file_type}".toString() - meta.index = row.index ? true : false - - return [meta, mapped, index] - - } - -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~