Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release #851

Merged
merged 5 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ jobs:
wget https://repo1.maven.org/maven2/org/mariadb/jdbc/mariadb-java-client/3.0.3/mariadb-java-client-3.0.3.jar
popd
# liquibase
# Temporarily disable due to removed liquibase 4.22.0, re-enable when liquibase release again
# VERSION=$(curl -Ls -o /dev/null -w %{url_effective} https://github.com/liquibase/liquibase/releases/latest | grep -o "v.*" | sed s/'>.*'//g | sed s/'v'//g | sed 's/"//g')
VERSION=4.21.1
VERSION=4.28.0
curl -L https://github.com/liquibase/liquibase/releases/download/v${VERSION}/liquibase-${VERSION}.zip --output liquibase-${VERSION}.zip
unzip -o -d liquibase liquibase-${VERSION}.zip
echo "$(pwd)/liquibase" >> $GITHUB_PATH
Expand Down Expand Up @@ -77,20 +75,29 @@ jobs:
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}


- name: "build web front-end"
run: |
set -eo pipefail
pushd web
# installs package-lock, not what it thinks it should be
npm ci
npm run build
rc=$?

echo "web_rc=$rc" >> $GITHUB_OUTPUT
# eventually run web front-end tests
popd

- name: Fail if tests are not passing
if: ${{ steps.runtests.outputs.rc != 0 }}
- name: Fail if unit tests are not passing
if: ${{ steps.runtests.outputs.rc != 0}}
uses: actions/github-script@v6
with:
script: |
core.setFailed('Unittests failed with rc = ${{ steps.runtests.outputs.rc }}')

- name: Fail if web build fails
if: ${{ steps.runtests.outputs.rc != 0}}
uses: actions/github-script@v6
with:
script: |
core.setFailed('Unit tests failed with rc = ${{ steps.runtests.outputs.rc }}')
core.setFailed('Web failed to build with rc = ${{ steps.runtests.outputs.web_rc }}')
29 changes: 29 additions & 0 deletions db/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1420,4 +1420,33 @@
remarks="Migration of external IDs to separate table"
/>
</changeSet>
<changeSet id="2024-06-25_nested-samples" author="michael.franklin">
<sql>SET @@system_versioning_alter_history = 1;</sql>
<addColumn tableName="sample">
<!-- and root sample id -->
<column
name="sample_root_id"
type="INT"
>
<constraints
nullable="true"
foreignKeyName="FK_SAMPLE_ROOT_SAMPLE"
references="sample(id)"
deleteCascade="true"
/>
</column>
<column
name="sample_parent_id"
type="INT"
>
<constraints
nullable="true"
foreignKeyName="FK_SAMPLE_PARENT_SAMPLE"
references="sample(id)"
deleteCascade="true"
/>
</column>

</addColumn>
</changeSet>
</databaseChangeLog>
11 changes: 10 additions & 1 deletion db/python/filters/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class GenericFilter(SMBase, Generic[T]):
contains: T | None = None
icontains: T | None = None
startswith: T | None = None
isnull: bool | None = None

def __repr__(self):
keys = [
Expand All @@ -68,6 +69,7 @@ def __repr__(self):
'contains',
'icontains',
'startswith',
'isnull',
]
inner_values = ', '.join(
f'{k}={getattr(self, k)!r}' for k in keys if getattr(self, k) is not None
Expand All @@ -90,6 +92,7 @@ def get_hashable_value(self):
self.contains,
self.icontains,
self.startswith,
self.isnull,
)
)

Expand Down Expand Up @@ -196,7 +199,12 @@ def to_sql(
search_term = escape_like_term(str(self.startswith))
k = self.generate_field_name(column + '_startswith')
conditionals.append(f'{column} LIKE :{k}')
values[k] = self._sql_value_prep(search_term + '%')
values[k] = self._sql_value_prep(escape_like_term(search_term) + '%')
if self.isnull is not None:
if self.isnull:
conditionals.append(f'{column} IS NULL')
else:
conditionals.append(f'{column} IS NOT NULL')

return ' AND '.join(conditionals), values

Expand Down Expand Up @@ -228,6 +236,7 @@ def transform(self, func: Callable[[T], X]) -> 'GenericFilter[X]':
contains=func(self.contains) if self.contains else None,
icontains=func(self.icontains) if self.icontains else None,
startswith=func(self.startswith) if self.startswith else None,
isnull=self.isnull,
)


Expand Down
5 changes: 5 additions & 0 deletions db/python/filters/participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class ParticipantSampleFilter(GenericFilterModel):
external_id: GenericFilter[str] | None = None
meta: GenericMetaFilter | None = None

sample_root_id: GenericFilter[int] | None = None
sample_parent_id: GenericFilter[int] | None = None

@dataclasses.dataclass(kw_only=True)
class ParticipantSequencingGroupFilter(GenericFilterModel):
"""
Expand Down Expand Up @@ -88,6 +91,8 @@ def get_sample_filter(self) -> SampleFilter:
external_id=self.sample.external_id if self.sample else None,
type=self.sample.type if self.sample else None,
meta=self.sample.meta if self.sample else None,
sample_root_id=self.sample.sample_root_id if self.sample else None,
sample_parent_id=self.sample.sample_parent_id if self.sample else None,
project=self.project,
participant_id=self.id,
sequencing_group=(
Expand Down
3 changes: 3 additions & 0 deletions db/python/filters/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class SampleAssayFilter(GenericFilterModel):
external_id: GenericFilter[str] | None = None
active: GenericFilter[bool] | None = None

sample_root_id: GenericFilter[int] | None = None
sample_parent_id: GenericFilter[int] | None = None

# nested
sequencing_group: SampleSequencingGroupFilter | None = None
assay: SampleAssayFilter | None = None
Expand Down
19 changes: 4 additions & 15 deletions db/python/layers/ourdna/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,6 @@ def get_property(self, property_name: str) -> Any:
property_name.replace('-', '_')
)

def get_time_difference_in_seconds(
self, start_property: str, end_property: str
) -> int | None:
"""Calculate time difference in seconds between two meta properties."""
start_time = self.get_property(start_property)
end_time = self.get_property(end_property)
if start_time is None or end_time is None:
return None
time_taken = datetime.strptime(
end_time, '%Y-%m-%d %H:%M:%S'
) - datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
return int(time_taken.total_seconds())

@staticmethod
def try_parse_datetime(d: str) -> datetime | None:
"""
Expand All @@ -53,6 +40,8 @@ def try_parse_datetime(d: str) -> datetime | None:
Returns:
datetime | None: A datetime object if parsing is successful, otherwise None.
"""
if not d:
return None
try:
return datetime.strptime(d, '%Y-%m-%d %H:%M:%S')
except TypeError as e:
Expand Down Expand Up @@ -169,9 +158,9 @@ async def query(
s, participants = await asyncio.gather(
self.sample_layer.query(
filter_=SampleFilter(
# Added `ebld` filtering temporarily to prevent duplicate rows
project=GenericFilter(eq=project_id),
type=GenericFilter(eq='ebld'),
# Get the top level samples only
sample_root_id=GenericFilter(isnull=True),
)
),
self.participant_layer.get_participants(project=project_id),
Expand Down
156 changes: 120 additions & 36 deletions db/python/layers/sample.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import datetime
from typing import Any
from typing import Any, NamedTuple

from api.utils import group_by
from db.python.filters import GenericFilter
Expand Down Expand Up @@ -218,7 +218,9 @@ async def get_samples_create_date(
async def upsert_sample(
self,
sample: SampleUpsertInternal,
project: ProjectId = None,
sample_parent_id: int | None = None,
sample_root_id: int | None = None,
project: ProjectId | None = None,
process_sequencing_groups: bool = True,
process_assays: bool = True,
open_transaction: bool = True,
Expand All @@ -227,44 +229,50 @@ async def upsert_sample(
with_function = (
self.connection.connection.transaction if open_transaction else NoOpAenter
)

# safely ignore nested samples here
async with with_function():
if not sample.id:
sample.id = await self.st.insert_sample(
external_ids=sample.external_ids,
sample_type=sample.type,
active=True,
meta=sample.meta,
participant_id=sample.participant_id,
project=project,
)
else:
# Otherwise update
await self.st.update_sample(
id_=sample.id, # type: ignore
external_ids=sample.external_ids,
meta=sample.meta,
participant_id=sample.participant_id,
type_=sample.type,
active=sample.active,
)
for r in self.unwrap_nested_samples([sample]):
s = r.sample
if not s.id:
s.id = await self.st.insert_sample(
external_ids=s.external_ids,
sample_type=s.type,
active=True,
meta=s.meta,
participant_id=s.participant_id,
project=project,
sample_parent_id=r.parent.id if r.parent else sample_parent_id,
sample_root_id=r.root.id if r.root else sample_root_id,
)
else:
# Otherwise update
await self.st.update_sample(
id_=s.id, # type: ignore
external_ids=s.external_ids,
meta=s.meta,
participant_id=s.participant_id,
type_=s.type,
active=s.active,
sample_parent_id=sample_parent_id,
sample_root_id=sample_root_id,
)

if sample.sequencing_groups:
sglayer = SequencingGroupLayer(self.connection)
for seqg in sample.sequencing_groups:
seqg.sample_id = sample.id
if sample.sequencing_groups:
sglayer = SequencingGroupLayer(self.connection)
for seqg in sample.sequencing_groups:
seqg.sample_id = sample.id

if process_sequencing_groups:
await sglayer.upsert_sequencing_groups(sample.sequencing_groups)
if process_sequencing_groups:
await sglayer.upsert_sequencing_groups(sample.sequencing_groups)

if sample.non_sequencing_assays:
alayer = AssayLayer(self.connection)
for assay in sample.non_sequencing_assays:
assay.sample_id = sample.id
if process_assays:
await alayer.upsert_assays(
sample.non_sequencing_assays, open_transaction=False
)
if sample.non_sequencing_assays:
alayer = AssayLayer(self.connection)
for assay in sample.non_sequencing_assays:
assay.sample_id = sample.id
if process_assays:
await alayer.upsert_assays(
sample.non_sequencing_assays, open_transaction=False
)

return sample

Expand Down Expand Up @@ -319,6 +327,82 @@ async def upsert_samples(

return samples

class UnwrappedSample(NamedTuple):
"""
When we unwrap the nested samples, we store the root and parent to insert later
"""

root: SampleUpsertInternal | None
parent: SampleUpsertInternal | None
sample: SampleUpsertInternal

class SampleUnwrapMaxDepthError(Exception):
"""Error for when we exceed the user-set max-depth"""

@staticmethod
def unwrap_nested_samples(
samples: list[SampleUpsertInternal], max_depth=10
) -> list[UnwrappedSample]:
"""
We only insert one by one, so we don't need to do anything too crazy, just pull
out the insert order, keeping reference to the root, and parent.

Just keep a soft limit on the depth, as we don't want to go too deep.

NB: Opting for a non-recursive approach here, as I'm a bit afraid of recursive
Python after a weird Hail Batch thing, and sounded like a nightmare to debug
"""

retval: list[SampleLayer.UnwrappedSample] = []

seen_samples = {id(s) for s in samples}

rounds: list[
list[
tuple[
SampleUpsertInternal | None,
SampleUpsertInternal | None,
list[SampleUpsertInternal],
]
]
] = [[(None, None, samples)]]

round_idx = 0
while round_idx < len(rounds):
prev_round = rounds[round_idx]
new_round = []
round_idx += 1
for root, parent, nested_samples in prev_round:

for sample in nested_samples:
retval.append(
SampleLayer.UnwrappedSample(
root=root, parent=parent, sample=sample
)
)
if not sample.nested_samples:
continue

# do the seen check
for s in sample.nested_samples:
if id(s) in seen_samples:
raise ValueError(
f'Sample sample was seen in the list ({s})'
)
seen_samples.add(id(s))
new_round.append((root or sample, sample, sample.nested_samples))

if new_round:
if round_idx >= max_depth:
parents = ', '.join(str(s) for _, s, _ in new_round)
raise SampleLayer.SampleUnwrapMaxDepthError(
f'Exceeded max depth of {max_depth} for nested samples. '
f'Parents: {parents}'
)
rounds.append(new_round)

return retval

async def merge_samples(
self,
id_keep: int,
Expand Down
2 changes: 2 additions & 0 deletions db/python/layers/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,8 @@ def assemble_nested_participants_from(
external_ids=sample.external_ids,
type=sample.type,
meta=sample.meta,
sample_root_id=sample.sample_root_id,
sample_parent_id=sample.sample_parent_id,
created_date=screate,
sequencing_groups=nested_sgs,
non_sequencing_assays=[],
Expand Down
2 changes: 2 additions & 0 deletions db/python/tables/participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ async def _construct_participant_query(
'id': 's.id',
'type': 's.type',
'meta': 's.meta',
'sample_root_id': 's.sample_root_id',
'sample_parent_id': 's.sample_parent_id',
},
exclude=['external_id'],
)
Expand Down
Loading