nf-core · SusiJo · Aug 4, 2023 · Aug 2, 2023 · adamrtalbot · Aug 4, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Changed`
 
+- [#63](https://github.com/nf-core/bamtofastq/pull/63) Replace extract_csv with nf-validation plugin
+
 ### `Fixed`
 
 - [#62](https://github.com/nf-core/bamtofastq/pull/62) Adjust subway map for dark mode.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/bamtofastq" target="_blank">nf-core/bamtofastq</a>
+  This report has been generated by the <a href="https://github.com/nf-core/bamtofastq/tree/dev" target="_blank">nf-core/bamtofastq</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/bamtofastq/2.0.0/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/bamtofastq/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-bamtofastq-methods-description":
     order: -1000

diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -7,30 +7,33 @@
     "items": {
         "type": "object",
         "properties": {
-            "sample": {
+            "sample_id": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Sample name must be provided and cannot contain spaces"
+                "errorMessage": "The sample sheet should specify a sample_id for each row which cannot contain spaces.",
+                "meta": ["id"]
             },
-            "fastq_1": {
+            "mapped": {
                 "type": "string",
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.(bam|cram)$",
+                "errorMessage": "The sample sheet should specify a mapped file for each row, either '.bam' or '.cram'"
             },
-            "fastq_2": {
-                "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.f(ast)?q\\.gz$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ]
+            "index": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.(bai|crai)$",
+                "errorMessage": "Index files can be provided but must have the extension '.bai' or '.crai'"
+            },
+            "file_type": {
+                "type": "string",
+                "errorMessage": "Input filetypes must either be 'bam' or 'cram'",
+                "meta": ["filetype"],
+                "enums": ["bam", "cram"]
             }
         },
-        "required": ["sample", "fastq_1"]
+        "required": ["sample_id", "mapped", "file_type"]
     }
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -20,7 +20,8 @@
                     "pattern": "^\\S+\\.csv$",
                     "description": "Path to comma-separated file containing information about the samples in the experiment.",
                     "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/bamtofastq/usage#samplesheet-input).",
-                    "fa_icon": "fas fa-file-csv"
+                    "fa_icon": "fas fa-file-csv",
+                    "schema": "assets/schema_input.json"
                 },
                 "outdir": {
                     "type": "string",

diff --git a/workflows/bamtofastq.nf b/workflows/bamtofastq.nf
@@ -4,7 +4,7 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
 
 def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
 def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
@@ -16,8 +16,18 @@ log.info logo + paramsSummaryLog(workflow) + citation
 WorkflowBamtofastq.initialise(params, log)
 
 // Check mandatory parameters
-ch_input = extract_csv(file(params.input))
+ch_input = Channel.fromSamplesheet("input")
+            .map{ meta, mapped, index ->
 
+            if (meta.filetype != mapped.getExtension().toString()) {
+                error('The file extension does not fit the specified file_type.\n' + mapped.toString() )
+            }
+
+            meta.index  = index ? true : false
+
+            return [meta, mapped, index]
+
+            }
 
 // Initialize file channels based on params
 fasta     = params.fasta     ? Channel.fromPath(params.fasta).collect()      : Channel.value([])
@@ -255,50 +265,6 @@ workflow.onComplete {
     FUNCTIONS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
-// Function to extract information (meta data + file(s)) from csv file(s)
-def extract_csv(csv_file) {
-
-    // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so.
-    file(csv_file).withReader('UTF-8') { reader ->
-        def line, numberOfLinesInSampleSheet = 0;
-        while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++}
-        if (numberOfLinesInSampleSheet < 2) {
-            error("Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines.")
-        }
-    }
-    Channel.from(csv_file).splitCsv(header: true)
-        .map{ row ->
-            if ( !row.sample_id ) {  // This also handles the case where the lane is left as an empty string
-                error('The sample sheet should specify a sample_id for each row.\n' + row.toString())
-            }
-            if ( !row.mapped ) {  // This also handles the case where the lane is left as an empty string
-                error('The sample sheet should specify a mapped file for each row.\n' + row.toString())
-            }
-            if (!row.file_type) {  // This also handles the case where the lane is left as an empty string
-                error('The sample sheet should specify a file_type for each row, valid values are bam/cram.\n' + row.toString())
-            }
-            if (!(row.file_type == "bam" || row.file_type == "cram")) {
-                error('The file_type for the row below is neither "bam" nor "cram". Please correct this.\n' + row.toString() )
-            }
-            if (row.file_type != file(row.mapped).getExtension().toString()) {
-                error('The file extension does not fit the specified file_type.\n' + row.toString() )
-            }
-
-
-            // init meta map
-            def meta = [:]
-
-            meta.id       = "${row.sample_id}".toString()
-            def mapped    = file(row.mapped, checkIfExists: true)
-            def index     = row.index ? file(row.index, checkIfExists: true) : []
-            meta.filetype = "${row.file_type}".toString()
-            meta.index    = row.index ? true : false
-
-            return [meta, mapped, index]
-
-            }
-
-}
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~