nf-core · mirpedrol · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024 · Sep 20, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -67,6 +67,7 @@
 - The `modules_nfcore` tag in the `main.nf.test` file of modules/subworkflows now displays the organization name in custom modules repositories ([#3005](https://github.com/nf-core/tools/pull/3005))
 - Add `--migrate_pytest` option to `nf-core <modules|subworkflows> test` command ([#3085](https://github.com/nf-core/tools/pull/3085))
 - Components: allow spaces at the beginning of include statements ([#3115](https://github.com/nf-core/tools/pull/3115))
+- Add option `--fix` to update the `meta.yml` file of subworkflows ([#3077](https://github.com/nf-core/tools/pull/3077))
 
 ### General
 

diff --git a/nf_core/__main__.py b/nf_core/__main__.py
@@ -1479,11 +1479,14 @@ def command_subworkflows_list_local(ctx, keywords, json, directory):  # pylint:
     help="Sort lint output by subworkflow or test name.",
     show_default=True,
 )
-def command_subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
+@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
+def command_subworkflows_lint(
+    ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix
+):
     """
     Lint one or more subworkflows in a directory.
     """
-    subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by)
+    subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix)
 
 
 # nf-core subworkflows info

diff --git a/nf_core/commands_subworkflows.py b/nf_core/commands_subworkflows.py
@@ -104,7 +104,7 @@ def subworkflows_list_local(ctx, keywords, json, directory):  # pylint: disable=
         sys.exit(1)
 
 
-def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
+def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix):
     """
     Lint one or more subworkflows in a directory.
 
@@ -121,6 +121,7 @@ def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warn
         subworkflow_lint = SubworkflowLint(
             directory,
             fail_warned=fail_warned,
+            fix=fix,
             registry=ctx.params["registry"],
             remote_url=ctx.obj["modules_repo_url"],
             branch=ctx.obj["modules_repo_branch"],

diff --git a/nf_core/components/info.py b/nf_core/components/info.py
@@ -280,14 +280,26 @@ def generate_component_info_help(self):
             inputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
             inputs_table.add_column(":inbox_tray: Inputs")
             inputs_table.add_column("Description")
-            inputs_table.add_column("Pattern", justify="right", style="green")
+            if self.component_type == "modules":
+                inputs_table.add_column("Pattern", justify="right", style="green")
+            elif self.component_type == "subworkflows":
+                inputs_table.add_column("Structure", justify="right", style="green")
             for input in self.meta["input"]:
-                for key, info in input.items():
-                    inputs_table.add_row(
-                        f"[orange1 on black] {key} [/][dim i] ({info['type']})",
-                        Markdown(info["description"] if info["description"] else ""),
-                        info.get("pattern", ""),
-                    )
+                if self.component_type == "modules":
+                    for element in input:
+                        for key, info in element.items():
+                            inputs_table.add_row(
+                                f"[orange1 on black] {key} [/][dim i] ({info['type']})",
+                                Markdown(info["description"] if info["description"] else ""),
+                                info.get("pattern", ""),
+                            )
+                elif self.component_type == "subworkflows":
+                    for key, info in input.items():
+                        inputs_table.add_row(
+                            f"[orange1 on black] {key} [/][dim i]",
+                            Markdown(info["description"] if info["description"] else ""),
+                            info.get("structure", ""),
+                        )
 
             renderables.append(inputs_table)
 
@@ -296,14 +308,27 @@ def generate_component_info_help(self):
             outputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
             outputs_table.add_column(":outbox_tray: Outputs")
             outputs_table.add_column("Description")
-            outputs_table.add_column("Pattern", justify="right", style="green")
+            if self.component_type == "modules":
+                inputs_table.add_column("Pattern", justify="right", style="green")
+            elif self.component_type == "subworkflows":
+                inputs_table.add_column("Structure", justify="right", style="green")
             for output in self.meta["output"]:
-                for key, info in output.items():
-                    outputs_table.add_row(
-                        f"[orange1 on black] {key} [/][dim i] ({info['type']})",
-                        Markdown(info["description"] if info["description"] else ""),
-                        info.get("pattern", ""),
-                    )
+                if self.component_type == "modules":
+                    for ch_name, elements in output.items():
+                        for element in elements:
+                            for key, info in element.items():
+                                outputs_table.add_row(
+                                    f"[orange1 on black] {key} [/][dim i] ({info['type']})",
+                                    Markdown(info["description"] if info["description"] else ""),
+                                    info.get("pattern", ""),
+                                )
+                elif self.component_type == "subworkflows":
+                    for key, info in output.items():
+                        outputs_table.add_row(
+                            f"[orange1 on black] {key} [/][dim i]",
+                            Markdown(info["description"] if info["description"] else ""),
+                            info.get("structure", ""),
+                        )
 
             renderables.append(outputs_table)
 

diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py
@@ -5,7 +5,7 @@
 import logging
 import re
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 
 log = logging.getLogger(__name__)
 
@@ -41,6 +41,7 @@ def __init__(
             remote_component (bool): Whether the module is to be treated as a
                                      nf-core or local component
         """
+        self.component_type = component_type
         self.component_name = component_name
         self.repo_url = repo_url
         self.component_dir = component_dir
@@ -170,65 +171,95 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st
 
     def get_inputs_from_main_nf(self) -> None:
         """Collect all inputs from the main.nf file."""
-        inputs: list[list[dict[str, dict[str, str]]]] = []
+        inputs: Any = []  # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
-        inputs: Any = []  # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
+        inputs: Union[List[List[Dict[str, Dict[str, str]]]],List[str]] = []
-        inputs: Any = []  # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
+        inputs: Union[List[List[Dict[str, Dict[str, str]]]],List[str]] = []
         with open(self.main_nf) as f:
             data = f.read()
-        # get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
-        # regex matches:
-        # val(foo)
-        # path(bar)
-        # val foo
-        # val bar
-        # path bar
-        # path foo
-        # don't match anything inside comments or after "output:"
-        if "input:" not in data:
-            log.debug(f"Could not find any inputs in {self.main_nf}")
-            return
-        input_data = data.split("input:")[1].split("output:")[0]
-        for line in input_data.split("\n"):
-            channel_elements: list[dict[str, dict[str, str]]] = []
-            regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
-            matches = re.finditer(regex, line)
-            for _, match in enumerate(matches, start=1):
-                input_val = None
-                if match.group(3):
-                    input_val = match.group(3).split(",")[0]  # handle `files, stageAs: "inputs/*"` cases
-                elif match.group(4):
-                    input_val = match.group(4).split(",")[0]  # handle `files, stageAs: "inputs/*"` cases
-                if input_val:
-                    channel_elements.append({input_val: {}})
-            if len(channel_elements) > 0:
-                inputs.append(channel_elements)
-        log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
-        self.inputs = inputs
+        if self.component_type == "modules":
+            # get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
+            # regex matches:
+            # val(foo)
+            # path(bar)
+            # val foo
+            # val bar
+            # path bar
+            # path foo
+            # don't match anything inside comments or after "output:"
+            if "input:" not in data:
+                log.debug(f"Could not find any inputs in {self.main_nf}")
+                return
+            input_data = data.split("input:")[1].split("output:")[0]
+            for line in input_data.split("\n"):
+                channel_elements: Any = []
+                regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
+                matches = re.finditer(regex, line)
+                for _, match in enumerate(matches, start=1):
+                    input_val = None
+                    if match.group(3):
+                        input_val = match.group(3).split(",")[0]  # handle `files, stageAs: "inputs/*"` cases
+                    elif match.group(4):
+                        input_val = match.group(4).split(",")[0]  # handle `files, stageAs: "inputs/*"` cases
+                    if input_val:
+                        channel_elements.append({input_val: {}})
+                if len(channel_elements) > 0:
+                    inputs.append(channel_elements)
+            log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
+            self.inputs = inputs
+        elif self.component_type == "subworkflows":
+            # get input values from main.nf after "take:"
+            if "take:" not in data:
+                log.debug(f"Could not find any inputs in {self.main_nf}")
+                return
+            # get all lines between "take" and "main" or "emit"
+            input_data = data.split("take:")[1].split("main:")[0].split("emit:")[0]
+            for line in input_data.split("\n"):
+                try:
+                    inputs.append(line.split()[0])
+                except IndexError:
+                    pass  # Empty lines
+            log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
+            self.inputs = inputs
 
     def get_outputs_from_main_nf(self):
         outputs = []
         with open(self.main_nf) as f:
             data = f.read()
-        # get output values from main.nf after "output:". the names are always after "emit:"
-        if "output:" not in data:
-            log.debug(f"Could not find any outputs in {self.main_nf}")
-            return outputs
-        output_data = data.split("output:")[1].split("when:")[0]
-        regex_emit = r"emit:\s*([^)\s,]+)"
-        regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
-        for line in output_data.split("\n"):
-            match_emit = re.search(regex_emit, line)
-            matches_elements = re.finditer(regex_elements, line)
-            if not match_emit:
-                continue
-            output_channel = {match_emit.group(1): []}
-            for _, match_element in enumerate(matches_elements, start=1):
-                output_val = None
-                if match_element.group(3):
-                    output_val = match_element.group(3)
-                elif match_element.group(4):
-                    output_val = match_element.group(4)
-                if output_val:
-                    output_val = output_val.strip("'").strip('"')  # remove quotes
-                    output_channel[match_emit.group(1)].append({output_val: {}})
-            outputs.append(output_channel)
-        log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
-        self.outputs = outputs
+        if self.component_type == "modules":
+            # get output values from main.nf after "output:". the names are always after "emit:"
+            if "output:" not in data:
+                log.debug(f"Could not find any outputs in {self.main_nf}")
+                return outputs
+            output_data = data.split("output:")[1].split("when:")[0]
+            regex_emit = r"emit:\s*([^)\s,]+)"
+            regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
+            for line in output_data.split("\n"):
+                match_emit = re.search(regex_emit, line)
+                matches_elements = re.finditer(regex_elements, line)
+                if not match_emit:
+                    continue
+                output_channel = {match_emit.group(1): []}
+                for _, match_element in enumerate(matches_elements, start=1):
+                    output_val = None
+                    if match_element.group(3):
+                        output_val = match_element.group(3)
+                    elif match_element.group(4):
+                        output_val = match_element.group(4)
+                    if output_val:
+                        output_val = output_val.strip("'").strip('"')  # remove quotes
+                        output_channel[match_emit.group(1)].append({output_val: {}})
+                outputs.append(output_channel)
+            log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
+            self.outputs = outputs
+        elif self.component_type == "subworkflows":
+            # get output values from main.nf after "emit:". Can be named outputs or not.
+            if "emit:" not in data:
+                log.debug(f"Could not find any outputs in {self.main_nf}")
+                return outputs
+            output_data = data.split("emit:")[1].split("}")[0]
+            for line in output_data.split("\n"):
+                try:
+                    outputs.append(line.split("=")[0].split()[0])
+                except IndexError:
+                    # Empty lines
+                    pass
+            log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
+            self.outputs = outputs
diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml
@@ -53,11 +53,6 @@ input:
 ## TODO nf-core: Add a description of all of the variables used as output
 {% endif -%}
 output:
-  - versions:
-    - "versions.yml":
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
   - {{ 'bam:' if not_empty_template else "output:" }}
   #{% if has_meta -%} Only when we have meta
     - meta:
@@ -81,6 +76,11 @@ output:
           {% else -%}
           - edam: ""
           {%- endif %}
+  - versions:
+    - "versions.yml":
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
 
 authors:
   - "{{ author }}"

diff --git a/nf_core/modules/lint/meta_yml.py b/nf_core/modules/lint/meta_yml.py
@@ -95,6 +95,23 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None
 
     # Confirm that all input and output channels are correctly specified
     if valid_meta_yml:
+        # confirm that the name matches the process name in main.nf
+        if meta_yaml["name"].upper() == module.process_name:
+            module.passed.append(
+                (
+                    "meta_name",
+                    "Correct name specified in `meta.yml`.",
+                    module.meta_yml,
+                )
+            )
+        else:
+            module.failed.append(
+                (
+                    "meta_name",
+                    f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
+                    module.meta_yml,
+                )
+            )
         # Check that inputs are specified in meta.yml
         if len(module.inputs) > 0 and "input" not in meta_yaml:
             module.failed.append(

diff --git a/nf_core/pipeline-template/modules.json b/nf_core/pipeline-template/modules.json
@@ -8,12 +8,12 @@
                     {%- if fastqc %}
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     }{% endif %}{%- if multiqc %}{% if fastqc %},{% endif %}
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     }
                     {%- endif %}

diff --git a/nf_core/pipeline-template/modules/nf-core/fastqc/main.nf b/nf_core/pipeline-template/modules/nf-core/fastqc/main.nf