Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to lark for parsing nd file contents #5

Merged
merged 3 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v3
Expand Down
12 changes: 8 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "metamorph-mda-parser"
dynamic = ["version"]
description = 'Light-weight parsing of Metamorph/VisiView .nd files.'
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license = "BSD-3-Clause"
keywords = []
authors = [
Expand All @@ -16,16 +16,16 @@ authors = [
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"lark",
"pandas",
"pydantic",
]

[project.urls]
Expand Down Expand Up @@ -54,7 +54,7 @@ cov = [
]

[[tool.hatch.envs.all.matrix]]
python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
python = ["3.10", "3.11", "3.12"]

[tool.hatch.envs.types]
dependencies = [
Expand All @@ -76,8 +76,12 @@ metamorph_mda_parser = ["src/metamorph_mda_parser", "*/metamorph-mda-parser/src/
tests = ["tests", "*/metamorph-mda-parser/tests"]

[tool.coverage.report]
show_missing = true
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
]

[tool.ruff.lint.flake8-type-checking]
runtime-evaluated-base-classes = ["pydantic.BaseModel"]
36 changes: 36 additions & 0 deletions src/metamorph_mda_parser/lark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from lark import Lark, Transformer


class NDInfoTransformer(Transformer):
def __init__(self):
self.wave_names = []
self.wave_do_z = []
self.stage_positions = []

def start(self, items):
result = dict(i for i in items if i is not None)
result["WaveNames"] = self.wave_names
result["WaveDoZ"] = self.wave_do_z
result["StagePositions"] = self.stage_positions
return result

def line(self, key_value):
key, value = key_value
if key.startswith("WaveName"):
self.wave_names.append(value)
return None # We handle WaveName entries separately
if key.startswith("Stage"):
self.stage_positions.append(value)
return None # We handle Stage entries separately
if key.startswith("WaveDoZ"):
self.wave_do_z.append(value)
return None # We handle WaveDoZ entries separately
return (key, value)

def boolean_value(self, b):
return b[0].value == "TRUE"


def parse(content):
parser = Lark.open("nd_grammar.lark", rel_to=__file__, parser="lalr", transformer=NDInfoTransformer())
return parser.parse(content)
105 changes: 32 additions & 73 deletions src/metamorph_mda_parser/nd.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,48 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Literal

if TYPE_CHECKING:
from pathlib import Path
from pathlib import Path
from typing import Literal

import pandas as pd
from pydantic import AliasGenerator, BaseModel, ConfigDict
from pydantic.alias_generators import to_pascal

from metamorph_mda_parser.lark import parse


class NdInfo:
class NdInfo(BaseModel):
model_config = ConfigDict(
alias_generator=AliasGenerator(
validation_alias=to_pascal,
),
)

path: Path
name: str
version: Literal["1.0", "2.0"]
description: str
do_timelapse: bool
do_stage: bool
do_wave: bool
do_z: bool
stage_positions: list[str]
wave_names: list[str]
wave_do_z: list[bool]
n_timepoints: int
n_z_steps: int
z_step_size: float
do_z_series: bool
stage_positions: list[str] = []
wave_names: list[str] = []
wave_do_z: list[bool] = []
n_stage_positions: int = 1
n_time_points: int = 1
n_z_steps: int = 1
z_step_size: float | None = None
wave_in_file_name: bool

def __init__(self, path: Path):
self.path = path
self.name = path.stem
self._parse_nd()

def _parse_nd(self) -> None:
with open(self.path) as nd:
# Version
self.version = self._parse_line(nd.readline(), "NDInfoFile", self._extract_version)
self.description = self._parse_line(nd.readline(), "Description", str)
self.start_time = self._parse_line(nd.readline(), "StartTime1", str)
# Time lapse
self.do_timelapse = self._parse_line(nd.readline(), "DoTimelapse", self._parse_bool)
if self.do_timelapse:
self.n_timepoints = self._parse_line(nd.readline(), "NTimePoints", int)

# Stage positions
self.do_stage = self._parse_line(nd.readline(), "DoStage", self._parse_bool)
if self.do_stage:
n_stage_positions = self._parse_line(nd.readline(), "NStagePositions", int)
self.stage_positions = []
for s in range(n_stage_positions):
self.stage_positions.append(self._parse_line(nd.readline(), f"Stage{s+1}", str))

# Wavelengths
self.do_wave = self._parse_line(nd.readline(), "DoWave", self._parse_bool)
if self.do_wave:
n_wavelengths = self._parse_line(nd.readline(), "NWavelengths", int)
self.wave_names = []
self.wave_do_z = []
for w in range(n_wavelengths):
self.wave_names.append(self._parse_line(nd.readline(), f"WaveName{w+1}", str))
self.wave_do_z.append(self._parse_line(nd.readline(), f"WaveDoZ{w+1}", self._parse_bool))

# Z steps
self.do_z = self._parse_line(nd.readline(), "DoZSeries", self._parse_bool)
self.n_z_steps = self._parse_line(nd.readline(), "NZSteps", int)
self.z_step_size = self._parse_line(nd.readline(), "ZStepSize", float)

self.wave_in_file_name = self._parse_line(nd.readline(), "WaveInFileName", self._parse_bool)

# End of file
last_line = nd.readline()
if last_line.strip(' "\n') != "EndFile":
message = f"Expected end of file, got: {last_line}"
raise ValueError(message)

def _parse_line(self, line: str, key: str, value_function: Callable):
tokens = line.split(",")
if tokens[0].strip(' "') != key:
message = f"Invalid nd file contents.\n\texpected: {key}\n\tgot: {line}"
raise ValueError(message)
return value_function(tokens[1].strip(' "\n'))

def _extract_version(self, value: str) -> str:
return value[8:]

def _parse_bool(self, value: str) -> bool:
return value.lower() == "true"
@staticmethod
def from_path(path: Path):
with open(path) as f:
content = f.read()
result = parse(content)
result["Path"] = path
result["Name"] = path.stem
result["Version"] = "1.0" # HACK
return NdInfo(**result)

def _wavelengths(self):
for i, w in enumerate(self.wave_names):
Expand All @@ -92,7 +51,7 @@ def _wavelengths(self):
i,
w,
f"_w{i+1}{w}" if self.wave_in_file_name else "",
self.wave_do_z[i],
self.wave_do_z[i] if self.wave_do_z else False,
)

def _stage_positions(self):
Expand All @@ -102,7 +61,7 @@ def _stage_positions(self):

def _timepoints(self):
if self.do_timelapse:
for t in range(self.n_timepoints):
for t in range(self.n_time_points):
yield t, f"_t{t+1}"

def _get_path_channel_position_time(self):
Expand Down
40 changes: 40 additions & 0 deletions src/metamorph_mda_parser/nd_grammar.lark
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
start: line+ "\"EndFile\""
line: _QUOTE _special_key _QUOTE "," _special_value
| _QUOTE _string_key _QUOTE "," _QUOTE _string_value _QUOTE
| _QUOTE _boolean_key _QUOTE "," boolean_value
| _QUOTE _integer_key _QUOTE "," _integer_value
| _QUOTE _float_key _QUOTE "," _float_value

_special_key: /NDInfoFile/
| /Description/
| /StartTime[0-9]+/
_special_value: /.+/

_string_key: /WaveName[0-9]+/
| /Stage[0-9]+/
_string_value: /[^"]+/

_boolean_key: /DoTimelapse/
| /DoStage/
| /DoWave/
| /DoZSeries/
| /WaveInFileName/
| /WaveDoZ[0-9]+/
boolean_value: BOOLEAN

_integer_key: /NWavelengths/
| /NStagePositions/
| /NTimePoints/
| /NZSteps/
_integer_value: INT

_float_key: /ZStepSize/
_float_value: DECIMAL | INT

%import common.INT
%import common.DECIMAL
%import common.WS
%ignore WS

BOOLEAN: "TRUE" | "FALSE"
_QUOTE: "\""
14 changes: 14 additions & 0 deletions tests/resources/sample_4ch_1pos_1z.nd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"NDInfoFile", Version 1.0
"Description", File recreated from images.
"StartTime1", 20240603 12:25:48.610
"DoTimelapse", FALSE
"DoStage", FALSE
"DoWave", TRUE
"NWavelengths", 4
"WaveName1", "confDAPI"
"WaveName2", "confGFP"
"WaveName3", "confmCherry"
"WaveName4", "confCy5"
"DoZSeries", FALSE
"WaveInFileName", TRUE
"EndFile"
38 changes: 33 additions & 5 deletions tests/test_nd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@ def sample_2ch_75pos_361t():
return Path("tests/resources/sample_2ch_75pos_361t.nd")


@pytest.fixture
def sample_4ch_1pos_1z():
return Path("tests/resources/sample_4ch_1pos_1z.nd")


def test_sample_4ch_4pos(sample_4ch_4pos):
nd_info = NdInfo(sample_4ch_4pos)
nd_info = NdInfo.from_path(sample_4ch_4pos)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
Expand All @@ -25,7 +30,7 @@ def test_sample_4ch_4pos(sample_4ch_4pos):
assert len(nd_info.stage_positions) == 4
assert nd_info.do_wave
assert len(nd_info.wave_names) == 4
assert nd_info.do_z
assert nd_info.do_z_series
assert nd_info.n_z_steps == 42
assert nd_info.z_step_size == 3.0
assert nd_info.wave_in_file_name
Expand All @@ -47,17 +52,17 @@ def test_sample_4ch_4pos(sample_4ch_4pos):


def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t):
nd_info = NdInfo(sample_2ch_75pos_361t)
nd_info = NdInfo.from_path(sample_2ch_75pos_361t)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert nd_info.do_timelapse
assert nd_info.n_timepoints == 361
assert nd_info.n_time_points == 361
assert nd_info.do_stage
assert len(nd_info.stage_positions) == 75
assert nd_info.do_wave
assert len(nd_info.wave_names) == 2
assert nd_info.do_z
assert nd_info.do_z_series
assert nd_info.n_z_steps == 25
assert nd_info.z_step_size == 2.0
assert nd_info.wave_in_file_name
Expand All @@ -66,3 +71,26 @@ def test_sample_2ch_75pos_361t(sample_2ch_75pos_361t):
files = nd_info.get_files()

assert len(files) == 54150


def test_sample_4ch_1pos_1z(sample_4ch_1pos_1z):
nd_info = NdInfo.from_path(sample_4ch_1pos_1z)

assert nd_info.version == "1.0"
assert nd_info.description == "File recreated from images."
assert not nd_info.do_timelapse
assert nd_info.n_time_points == 1
assert not nd_info.do_stage
assert len(nd_info.stage_positions) == 0
assert nd_info.do_wave
assert len(nd_info.wave_names) == 4
assert not nd_info.do_z_series
assert nd_info.n_z_steps == 1
assert nd_info.z_step_size is None
assert nd_info.wave_in_file_name
assert nd_info.wave_names == ["confDAPI", "confGFP", "confmCherry", "confCy5"]

files = nd_info.get_files()

assert len(files) == 4
assert all(p.suffix == ".tif" for p in files["path"])
Loading