Skip to content

Commit

Permalink
Fix nullable reference
Browse files Browse the repository at this point in the history
  • Loading branch information
illusional committed Oct 21, 2023
1 parent a52a247 commit d72d605
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 9 deletions.
2 changes: 1 addition & 1 deletion db/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,7 @@
type="INT"
>
<constraints
nullable="false"
nullable="true"
foreignKeyName="FK_PROJECT_WRITE_GROUP_GROUP_ID"
references="group(id)" />
</column>
Expand Down
56 changes: 48 additions & 8 deletions db/python/tables/analysis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pylint: disable=too-many-instance-attributes
import dataclasses
from collections import defaultdict
from datetime import date, datetime
from datetime import date, datetime, timedelta
from typing import Any, Dict, List, Optional, Set, Tuple

from db.python.connect import DbBase, NotFoundError
Expand Down Expand Up @@ -552,10 +552,24 @@ async def get_seqr_stats_by_sequencing_type(
# endregion STATS

async def get_sg_add_to_project_es_index(
self, sg_ids: list[int]
) -> dict[int, date]:
self, sg_ids: list[int], min_date: date
) -> list[tuple[date, set[int]]]:
"""
Get all the sequencing groups that should be added to seqr joint calls
We can have multiple joint-calls and es-indices on one day, and
es-indices are cumulative, and only deal with a single project.
ORIGINAL ATTEMPT:
Keep track of which dataset an es-index belongs to, and then on
a second pass evaluate the total samples for all DAY changes.
We add a *SPECIAL CASE* if the project-name is seqr, then that
sets the samples for the whole day to cover the joint-calls.
This unfortunately fails because the samples reported in some
es-indices are very small, and blows out any numbers
CURRENT IMPLEMENTATION
We just take a cumulative samples seen over all time. So we
CANNOT tell if a sample is removed once it's in an ES index.
Though I think this is rare.
"""
_query = """
SELECT
Expand All @@ -570,7 +584,33 @@ async def get_sg_add_to_project_es_index(
GROUP BY a_sg.sequencing_group_id
"""

rows = await self.connection.fetch_all(
_query, {'sg_ids': sg_ids}
)
return {r['sg_id']: r['timestamp_completed'].date() for r in rows}
relevant_analyses = await self.connection.fetch_all(_query, {'sg_ids': sg_ids})

day_project_delta_sample_map: dict[date, set[str]] = defaultdict(set)
for analysis in relevant_analyses:
# Using timestamp_completed as the start time for the propmap
# is a small problem because then this script won't charge the new samples
# for the current joint-call as:
# joint_call.completed_timestamp > hail_joint_call.started_timestamp
# We might be able to roughly accept this by subtracting a day from the
# joint-call, and sort of hope that no joint-call runs over 24 hours.

dt = datetime.fromisoformat(
analysis['timestamp_completed']
).date() - timedelta(days=2)
# get the changes of samples for a specific day
day_project_delta_sample_map[max(min_date, dt)].add(analysis['sg_id'])

analysis_day_samples: list[tuple[date, set[int]]] = []

for analysis_day, aday_samples in sorted(
day_project_delta_sample_map.items(), key=lambda r: r[0]
):
if len(analysis_day_samples) == 0:
analysis_day_samples.append((analysis_day, aday_samples))
continue

new_samples = aday_samples | analysis_day_samples[-1][1]
analysis_day_samples.append((analysis_day, new_samples))

return analysis_day_samples

0 comments on commit d72d605

Please sign in to comment.