Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option --fix to update the meta.yml file of subworkflows. #3077

Open
wants to merge 13 commits into
base: dev
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- The `modules_nfcore` tag in the `main.nf.test` file of modules/subworkflows now displays the organization name in custom modules repositories ([#3005](https://github.com/nf-core/tools/pull/3005))
- Add `--migrate_pytest` option to `nf-core <modules|subworkflows> test` command ([#3085](https://github.com/nf-core/tools/pull/3085))
- Components: allow spaces at the beginning of include statements ([#3115](https://github.com/nf-core/tools/pull/3115))
- Add option `--fix` to update the `meta.yml` file of subworkflows ([#3077](https://github.com/nf-core/tools/pull/3077))

### General

Expand Down
7 changes: 5 additions & 2 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,11 +1479,14 @@ def command_subworkflows_list_local(ctx, keywords, json, directory): # pylint:
help="Sort lint output by subworkflow or test name.",
show_default=True,
)
def command_subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
def command_subworkflows_lint(
ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix
):
"""
Lint one or more subworkflows in a directory.
"""
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by)
subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix)


# nf-core subworkflows info
Expand Down
3 changes: 2 additions & 1 deletion nf_core/commands_subworkflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def subworkflows_list_local(ctx, keywords, json, directory): # pylint: disable=
sys.exit(1)


def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by):
def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warned, local, passed, sort_by, fix):
"""
Lint one or more subworkflows in a directory.

Expand All @@ -121,6 +121,7 @@ def subworkflows_lint(ctx, subworkflow, directory, registry, key, all, fail_warn
subworkflow_lint = SubworkflowLint(
directory,
fail_warned=fail_warned,
fix=fix,
registry=ctx.params["registry"],
remote_url=ctx.obj["modules_repo_url"],
branch=ctx.obj["modules_repo_branch"],
Expand Down
53 changes: 39 additions & 14 deletions nf_core/components/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,14 +280,26 @@ def generate_component_info_help(self):
inputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
inputs_table.add_column(":inbox_tray: Inputs")
inputs_table.add_column("Description")
inputs_table.add_column("Pattern", justify="right", style="green")
if self.component_type == "modules":
inputs_table.add_column("Pattern", justify="right", style="green")
elif self.component_type == "subworkflows":
inputs_table.add_column("Structure", justify="right", style="green")
for input in self.meta["input"]:
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
if self.component_type == "modules":
for element in input:
for key, info in element.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in input.items():
inputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
info.get("structure", ""),
)

renderables.append(inputs_table)

Expand All @@ -296,14 +308,27 @@ def generate_component_info_help(self):
outputs_table = Table(expand=True, show_lines=True, box=box.MINIMAL_HEAVY_HEAD, padding=0)
outputs_table.add_column(":outbox_tray: Outputs")
outputs_table.add_column("Description")
outputs_table.add_column("Pattern", justify="right", style="green")
if self.component_type == "modules":
inputs_table.add_column("Pattern", justify="right", style="green")
elif self.component_type == "subworkflows":
inputs_table.add_column("Structure", justify="right", style="green")
Comment on lines +311 to +314
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because this in the following is basically the same, like the inputs table, can't we just have a generic generate_params_table function to reduce code duplication?

for output in self.meta["output"]:
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
if self.component_type == "modules":
for ch_name, elements in output.items():
for element in elements:
for key, info in element.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i] ({info['type']})",
Markdown(info["description"] if info["description"] else ""),
info.get("pattern", ""),
)
elif self.component_type == "subworkflows":
for key, info in output.items():
outputs_table.add_row(
f"[orange1 on black] {key} [/][dim i]",
Markdown(info["description"] if info["description"] else ""),
info.get("structure", ""),
)

renderables.append(outputs_table)

Expand Down
143 changes: 87 additions & 56 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import re
from pathlib import Path
from typing import List, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,6 +41,7 @@ def __init__(
remote_component (bool): Whether the module is to be treated as a
nf-core or local component
"""
self.component_type = component_type
self.component_name = component_name
self.repo_url = repo_url
self.component_dir = component_dir
Expand Down Expand Up @@ -170,65 +171,95 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Union[Path, st

def get_inputs_from_main_nf(self) -> None:
"""Collect all inputs from the main.nf file."""
inputs: list[list[dict[str, dict[str, str]]]] = []
inputs: Any = [] # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
inputs: Any = [] # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]'
inputs: Union[List[List[Dict[str, Dict[str, str]]]],List[str]] = []

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't working, because we append one of the types later on :( I learnt about the overload decorator, but in our case it's not a function.

with open(self.main_nf) as f:
data = f.read()
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: list[dict[str, dict[str, str]]] = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
if self.component_type == "modules":
# get input values from main.nf after "input:", which can be formatted as tuple val(foo) path(bar) or val foo or val bar or path bar or path foo
# regex matches:
# val(foo)
# path(bar)
# val foo
# val bar
# path bar
# path foo
# don't match anything inside comments or after "output:"
if "input:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
input_data = data.split("input:")[1].split("output:")[0]
for line in input_data.split("\n"):
channel_elements: Any = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs
elif self.component_type == "subworkflows":
# get input values from main.nf after "take:"
if "take:" not in data:
log.debug(f"Could not find any inputs in {self.main_nf}")
return
# get all lines between "take" and "main" or "emit"
input_data = data.split("take:")[1].split("main:")[0].split("emit:")[0]
for line in input_data.split("\n"):
try:
inputs.append(line.split()[0])
except IndexError:
pass # Empty lines
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

def get_outputs_from_main_nf(self):
outputs = []
with open(self.main_nf) as f:
data = f.read()
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
if self.component_type == "modules":
# get output values from main.nf after "output:". the names are always after "emit:"
if "output:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
elif self.component_type == "subworkflows":
# get output values from main.nf after "emit:". Can be named outputs or not.
if "emit:" not in data:
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("emit:")[1].split("}")[0]
for line in output_data.split("\n"):
try:
outputs.append(line.split("=")[0].split()[0])
except IndexError:
# Empty lines
pass
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
10 changes: 5 additions & 5 deletions nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,6 @@ input:
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
Expand All @@ -81,6 +76,11 @@ output:
{% else -%}
- edam: ""
{%- endif %}
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "{{ author }}"
Expand Down
17 changes: 17 additions & 0 deletions nf_core/modules/lint/meta_yml.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,23 @@ def meta_yml(module_lint_object: ComponentLint, module: NFCoreComponent) -> None

# Confirm that all input and output channels are correctly specified
if valid_meta_yml:
# confirm that the name matches the process name in main.nf
if meta_yaml["name"].upper() == module.process_name:
module.passed.append(
(
"meta_name",
"Correct name specified in `meta.yml`.",
module.meta_yml,
)
)
else:
module.failed.append(
(
"meta_name",
f"Conflicting `process` name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{module.process_name}`)",
module.meta_yml,
)
)
# Check that inputs are specified in meta.yml
if len(module.inputs) > 0 and "input" not in meta_yaml:
module.failed.append(
Expand Down
4 changes: 2 additions & 2 deletions nf_core/pipeline-template/modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
{%- if fastqc %}
"fastqc": {
"branch": "master",
"git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}{% endif %}{%- if multiqc %}{% if fastqc %},{% endif %}
"multiqc": {
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
}
{%- endif %}
Expand Down
5 changes: 4 additions & 1 deletion nf_core/pipeline-template/modules/nf-core/fastqc/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading