diff --git a/docs/source/howto.rst b/docs/source/howto.rst index f570d21..b23b709 100644 --- a/docs/source/howto.rst +++ b/docs/source/howto.rst @@ -114,6 +114,91 @@ Any parent directories in the filepath, for example ``some/nested/path`` in the The output filename can be anything except for ``stdout``, ``stderr`` and ``status``, which are reserved filenames. +Running a shell command with folders as arguments +================================================= + +Certain commands might require the presence of a folder of files in the working directory. +Just like a file is modeled in AiiDA's provenance graph by a ``SinglefileData`` node, a folder is represented by a ``FolderData`` node. +The following example shows how a ``FolderData`` can be created to contain multiple files and how it can be passed to ``launch_shell_job`` using the ``nodes`` argument: + +.. code-block:: python + + import pathlib + import tempfile + from aiida.orm import FolderData + from aiida_shell import launch_shell_job + + # First create a ``FolderData`` node with some arbitrary files + with tempfile.TemporaryDirectory() as tmpdir: + dirpath = pathlib.Path(tmpdir) + (dirpath / 'file_a.txt').write_text('content a') + (dirpath / 'file_b.txt').write_text('content b') + folder_data = FolderData(tree=dirpath.absolute()) + + results, node = launch_shell_job( + 'ls', + nodes={ + 'directory': folder_data, + } + ) + print(results['stdout'].get_content()) + +which prints: + +.. code-block:: console + + _aiidasubmit.sh + file_a.txt + file_b.txt + _scheduler-stderr.txt + _scheduler-stdout.txt + stderr + stdout + +The contents of the ``folder_data`` node, the ``file_a.txt`` and ``file_b.txt`` files, were copied to the working directory. + +Note that by default, the contents of the ``FolderData`` are copied to the root of the working directory, as shown in the example above. +If the contents should be written to a directory inside the working directory, use the ``filenames`` argument, as is done for copying ``SinglefileData`` nodes. +Take for example the ``zip`` command that can create a zip archive from one or many files and folders. + +.. code-block:: python + + import pathlib + import tempfile + from aiida.orm import FolderData + from aiida_shell import launch_shell_job + + # First create a ``FolderData`` node with some arbitrary files + with tempfile.TemporaryDirectory() as tmpdir: + dirpath = pathlib.Path(tmpdir) + (dirpath / 'file_a.txt').write_text('content a') + (dirpath / 'file_b.txt').write_text('content b') + folder_data = FolderData(tree=dirpath.absolute()) + + results, node = launch_shell_job( + 'zip', + arguments='-r archive.zip {folder}', + outputs=['archive.zip'], + nodes={ + 'folder': folder_data, + }, + filenames={ + 'folder': 'directory' + } + ) + +In this example, the contents of the ``folder_data`` node were copied to the ``directory`` folder in the working directory. +The ``results`` dictionary contains the ``archive_zip`` output which is a ``SinglefileData`` node containing the zip archive. +It can be unzipped as follows: ``verdi node repo cat | unzip``, where ```` should be replaced with the pk or UUID of the ``archive_zip`` node. +The original files ``file_a.txt`` and ``file_b.txt`` are now written to the current working directory. + +.. note:: + + It is not required for a ``FolderData`` node, that is specified in the ``nodes`` input, to have a corresponding placeholder in the ``arguments``. + Just as with ``SinglefileData`` inputs nodes, if there is no corresponding placeholder, the contents of the folder are simply written to the working directory where the shell command is executed. + This is useful for commands that expect a folder to be present in the working directory but whose name is not explicitly defined through a command line argument. + + Passing other ``Data`` types as input ===================================== diff --git a/src/aiida_shell/calculations/shell.py b/src/aiida_shell/calculations/shell.py index d80f6f4..4b97479 100644 --- a/src/aiida_shell/calculations/shell.py +++ b/src/aiida_shell/calculations/shell.py @@ -9,7 +9,7 @@ from aiida.common.datastructures import CalcInfo, CodeInfo from aiida.common.folders import Folder from aiida.engine import CalcJob, CalcJobProcessSpec -from aiida.orm import Data, Dict, List, SinglefileData, to_aiida_type +from aiida.orm import Data, Dict, FolderData, List, SinglefileData, to_aiida_type from aiida_shell.data import PickledData @@ -133,7 +133,7 @@ def validate_nodes(cls, value: t.Mapping[str, Data], _) -> str | None: """Validate the ``nodes`` input.""" for key, node in value.items(): - if isinstance(node, SinglefileData): + if isinstance(node, (FolderData, SinglefileData)): continue try: @@ -289,6 +289,9 @@ def process_arguments_and_nodes( if isinstance(node, SinglefileData): filename = self.write_single_file_data(dirpath, node, placeholder, filenames) argument_interpolated = argument.format(**{placeholder: filename}) + elif isinstance(node, FolderData): + filename = self.write_folder_data(dirpath, node, placeholder, filenames) + argument_interpolated = argument.format(**{placeholder: filename}) else: argument_interpolated = argument.format(**{placeholder: str(node.value)}) @@ -296,8 +299,13 @@ def process_arguments_and_nodes( processed_arguments.append(argument_interpolated) for key, node in nodes.items(): - if key not in processed_nodes and isinstance(node, SinglefileData): + if key in processed_nodes: + continue + + if isinstance(node, SinglefileData): self.write_single_file_data(dirpath, node, key, filenames) + elif isinstance(node, FolderData): + self.write_folder_data(dirpath, node, key, filenames) return processed_arguments @@ -321,3 +329,25 @@ def write_single_file_data(dirpath: pathlib.Path, node: SinglefileData, key: str filepath.write_bytes(handle.read()) return filename + + @staticmethod + def write_folder_data(dirpath: pathlib.Path, node: FolderData, key: str, filenames: dict[str, str]) -> str: + """Write the content of a ``FolderData`` node to ``dirpath``. + + :param dirpath: A temporary folder on the local file system. + :param node: The node whose content to write. + :param key: The relative filename to use. + :param filenames: Mapping that can provide explicit filenames for the given key. + :returns: The relative filename used to write the content to ``dirpath``. + """ + if key in filenames: + filename = filenames[key] + filepath = dirpath / filename + else: + filename = key + filepath = dirpath + + filepath.parent.mkdir(parents=True, exist_ok=True) + node.base.repository.copy_tree(filepath) + + return filename diff --git a/tests/calculations/test_shell.py b/tests/calculations/test_shell.py index 93f901f..f60074b 100644 --- a/tests/calculations/test_shell.py +++ b/tests/calculations/test_shell.py @@ -5,7 +5,7 @@ import pathlib from aiida.common.datastructures import CodeInfo -from aiida.orm import Data, Float, Int, List, SinglefileData, Str +from aiida.orm import Data, Float, FolderData, Int, List, SinglefileData, Str import pytest from aiida_shell.calculations.shell import ShellJob @@ -44,6 +44,42 @@ def test_nodes_single_file_data(generate_calc_job, generate_code): assert sorted([p.name for p in dirpath.iterdir()]) == ['xa', 'xb'] +def test_nodes_folder_data(generate_calc_job, generate_code, tmp_path): + """Test the ``nodes`` input with ``FolderData`` nodes .""" + (tmp_path / 'file_a.txt').write_text('content a') + (tmp_path / 'file_b.txt').write_text('content b') + + folder_flat = FolderData(tree=tmp_path.absolute()) + folder_nested = FolderData() + folder_nested.base.repository.put_object_from_tree(tmp_path.absolute(), 'dir') + inputs = { + 'code': generate_code(), + 'arguments': ['{nested}', '{nested_explicit}'], + 'nodes': { + 'flat': folder_flat, + 'nested': folder_nested, + 'flat_explicit': folder_flat, + 'nested_explicit': folder_nested, + }, + 'filenames': { + 'flat_explicit': 'sub', + 'nested_explicit': 'sub' + } + } + dirpath, calc_info = generate_calc_job('core.shell', inputs) + code_info = calc_info.codes_info[0] + + assert code_info.cmdline_params == ['nested', 'sub'] + assert code_info.stdout_name == ShellJob.FILENAME_STDOUT + assert calc_info.retrieve_temporary_list == ShellJob.DEFAULT_RETRIEVED_TEMPORARY + assert sorted([p.name for p in dirpath.iterdir()]) == ['dir', 'file_a.txt', 'file_b.txt', 'sub'] + assert sorted([p.name for p in (dirpath / 'dir').iterdir()]) == ['file_a.txt', 'file_b.txt'] + assert sorted([p.name for p in (dirpath / 'sub').iterdir()]) == ['dir', 'file_a.txt', 'file_b.txt'] + assert sorted([p.name for p in (dirpath / 'sub' / 'dir').iterdir()]) == ['file_a.txt', 'file_b.txt'] + assert (dirpath / 'file_a.txt').read_text() == 'content a' + assert (dirpath / 'file_b.txt').read_text() == 'content b' + + def test_nodes_base_types(generate_calc_job, generate_code): """Test the ``nodes`` input with ``BaseType`` nodes .""" inputs = {