Skip to content

Commit

Permalink
Merge pull request #822 from populationgenomics/dev
Browse files Browse the repository at this point in the history
- Multiple external IDs for participants and samples (with UI hotfix)
- Fix epid/esid mismatch
  • Loading branch information
jmarshall authored Jun 13, 2024
2 parents ebaf378 + 71dddbe commit 7ca0ee5
Show file tree
Hide file tree
Showing 47 changed files with 1,018 additions and 261 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 7.0.4
current_version = 7.1.0
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>[A-z0-9-]+)
Expand Down
5 changes: 3 additions & 2 deletions api/graphql/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from db.python.utils import GenericFilter
from models.enums import AnalysisStatus
from models.models import (
PRIMARY_EXTERNAL_ORG,
AnalysisInternal,
AssayInternal,
AuditLogInternal,
Expand Down Expand Up @@ -636,7 +637,7 @@ class GraphQLParticipant:
def from_internal(internal: ParticipantInternal) -> 'GraphQLParticipant':
return GraphQLParticipant(
id=internal.id,
external_id=internal.external_id,
external_id=internal.external_ids[PRIMARY_EXTERNAL_ORG],
meta=internal.meta,
reported_sex=internal.reported_sex,
reported_gender=internal.reported_gender,
Expand Down Expand Up @@ -725,7 +726,7 @@ class GraphQLSample:
def from_internal(sample: SampleInternal) -> 'GraphQLSample':
return GraphQLSample(
id=sample_id_format(sample.id),
external_id=sample.external_id,
external_id=sample.external_ids[PRIMARY_EXTERNAL_ORG],
active=sample.active,
meta=sample.meta,
type=sample.type,
Expand Down
2 changes: 1 addition & 1 deletion api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from db.python.utils import get_logger

# This tag is automatically updated by bump2version
_VERSION = '7.0.4'
_VERSION = '7.1.0'


logger = get_logger()
Expand Down
139 changes: 139 additions & 0 deletions db/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1281,4 +1281,143 @@
<sql>ALTER TABLE cohort_sequencing_group ADD SYSTEM VERSIONING;</sql>
<sql>ALTER TABLE analysis_cohort ADD SYSTEM VERSIONING;</sql>
</changeSet>
<changeSet id="2024-05-03-multiple-external-ids" author="john.marshall">
<createTable tableName="participant_external_id">
<column name="project" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_PROJECT_PARTICIPANT_EXTERNAL_ID"
references="project(id)" />
</column>
<column name="participant_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_PARTICIPANT_PARTICIPANT_EXTERNAL_ID"
references="participant(id)" />
</column>
<column name="name" type="VARCHAR(255)" />
<column name="external_id" type="VARCHAR(255)">
<constraints nullable="false" />
</column>
<column name="meta" type="LONGTEXT" />
<column name="audit_log_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_PARTICIPANT_EXTERNAL_ID_CHANGELOG_ID"
references="audit_log(id)" />
</column>
</createTable>
<addPrimaryKey
tableName="participant_external_id"
columnNames="participant_id,name"
constraintName="PK_PARTICIPANT_EXTERNAL_ID"
validate="true"
/>
<addUniqueConstraint
tableName="participant_external_id"
columnNames="project,external_id"
constraintName="UK_PARTICIPANT_EXTERNAL_ID_UNIQUE_EIDS"
validate="true"
/>
<createIndex tableName="participant" indexName="fk_project_participant">
<column name="project" />
</createIndex>
<dropUniqueConstraint
tableName="participant"
constraintName="UK_PARTICIPANT_SAMPLE_EXTERNALID"
/>

<createTable tableName="sample_external_id">
<column name="project" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_PROJECT_SAMPLE_EXTERNAL_ID"
references="project(id)" />
</column>
<column name="sample_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_SAMPLE_SAMPLE_EXTERNAL_ID"
references="sample(id)" />
</column>
<column name="name" type="VARCHAR(255)" />
<column name="external_id" type="VARCHAR(255)">
<constraints nullable="false" />
</column>
<column name="meta" type="LONGTEXT" />
<column name="audit_log_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_SAMPLE_EXTERNAL_ID_CHANGELOG_ID"
references="audit_log(id)" />
</column>
</createTable>
<addPrimaryKey
tableName="sample_external_id"
columnNames="sample_id,name"
constraintName="PK_SAMPLE_EXTERNAL_ID"
validate="true"
/>
<addUniqueConstraint
tableName="sample_external_id"
columnNames="project,external_id"
constraintName="UK_SAMPLE_EXTERNAL_ID_UNIQUE_EIDS"
validate="true"
/>
<createIndex tableName="sample" indexName="fk_project_sample">
<column name="project" />
</createIndex>
<dropUniqueConstraint
tableName="sample"
constraintName="UK_SAMPLE_PROJECT_EXTERNALID"
/>

<sql>ALTER TABLE participant_external_id ADD SYSTEM VERSIONING;</sql>
<sql>ALTER TABLE sample_external_id ADD SYSTEM VERSIONING;</sql>

<!-- Migrate existing external_ids to the new tables, keyed by PRIMARY_EXTERNAL_ORG, i.e. '' -->
<sql>INSERT INTO audit_log (author, on_behalf_of, ar_guid, comment, auth_project, meta)
VALUES ('liquibase', NULL, NULL, 'participant/sample external_id migration', NULL, NULL)
RETURNING @audit_log_id := id;

INSERT INTO participant_external_id (project, participant_id, name, external_id, audit_log_id)
SELECT project, id, '', external_id, @audit_log_id
FROM participant;

INSERT INTO sample_external_id (project, sample_id, name, external_id, audit_log_id)
SELECT project, id, '', external_id, @audit_log_id
FROM sample;
</sql>
</changeSet>
<changeSet id="2024-06-11-drop-old-external-id-columns" author="john.marshall">
<sql>SET @@system_versioning_alter_history = 1;</sql>

<dropNotNullConstraint
tableName="participant"
columnName="external_id"
columnDataType="VARCHAR(255)"
/>
<sql>UPDATE participant SET external_id = NULL</sql>
<renameColumn
tableName="participant"
oldColumnName="external_id"
newColumnName="_external_id_unused"
columnDataType="VARCHAR(255)"
remarks="Migration of external IDs to separate table"
/>

<dropNotNullConstraint
tableName="sample"
columnName="external_id"
columnDataType="VARCHAR(255)"
/>
<sql>UPDATE sample SET external_id = NULL</sql>
<renameColumn
tableName="sample"
oldColumnName="external_id"
newColumnName="_external_id_unused"
columnDataType="VARCHAR(255)"
remarks="Migration of external IDs to separate table"
/>
</changeSet>
</databaseChangeLog>
2 changes: 2 additions & 0 deletions db/python/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
'analysis_sequencing_group',
'analysis_sample',
'assay_external_id',
'participant_external_id',
'sample_external_id',
'sequencing_group_external_id',
'family',
'family_participant',
Expand Down
4 changes: 2 additions & 2 deletions db/python/layers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
from db.python.tables.participant import ParticipantTable
from db.python.tables.sample import SampleTable
from db.python.utils import GenericFilter, NotFoundError
from models.models import PRIMARY_EXTERNAL_ORG, ProjectId
from models.models.family import FamilyInternal, PedRow, PedRowInternal
from models.models.participant import ParticipantUpsertInternal
from models.models.project import ProjectId


class FamilyLayer(BaseLayer):
Expand Down Expand Up @@ -299,7 +299,7 @@ async def import_pedigree(
continue
upserted_participant = await participant_table.upsert_participant(
ParticipantUpsertInternal(
external_id=row.individual_id,
external_ids={PRIMARY_EXTERNAL_ORG: row.individual_id},
reported_sex=row.sex,
)
)
Expand Down
16 changes: 10 additions & 6 deletions db/python/layers/participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
NotFoundError,
split_generic_terms,
)
from models.models import PRIMARY_EXTERNAL_ORG, ProjectId
from models.models.family import PedRowInternal
from models.models.participant import ParticipantInternal, ParticipantUpsertInternal
from models.models.project import ProjectId

HPO_REGEX_MATCHER = re.compile(r'HP\:\d+$')

Expand Down Expand Up @@ -250,6 +250,7 @@ async def get_participants_by_ids(
) -> list[ParticipantInternal]:
"""
Get participants by IDs
(note that the list returned need not be ordered as per the pids argument)
"""
projects, participants = await self.pttable.get_participants_by_ids(pids)

Expand Down Expand Up @@ -301,7 +302,10 @@ async def fill_in_missing_participants(self):
)
)
external_sample_map_with_no_pid = {
sample.external_id: sample.id for sample in samples_with_no_pid
sample.external_ids[PRIMARY_EXTERNAL_ORG]: sample.id for sample in samples_with_no_pid
}
external_ids_by_primary = {
sample.external_ids[PRIMARY_EXTERNAL_ORG]: sample.external_ids for sample in samples_with_no_pid
}
ext_sample_id_to_pid = {}

Expand All @@ -328,7 +332,7 @@ async def fill_in_missing_participants(self):
for external_id in external_participant_ids_to_add:
sample_id = external_sample_map_with_no_pid[external_id]
participant_id = await self.pttable.create_participant(
external_id=external_id,
external_ids=external_ids_by_primary[external_id],
reported_sex=None,
reported_gender=None,
karyotype=None,
Expand Down Expand Up @@ -410,7 +414,7 @@ async def generic_individual_metadata_importer(
)
for ex_pid in missing_participant_eids:
external_pid_map[ex_pid] = await self.pttable.create_participant(
external_id=ex_pid,
external_ids={PRIMARY_EXTERNAL_ORG: ex_pid},
reported_sex=None,
reported_gender=None,
karyotype=None,
Expand Down Expand Up @@ -605,7 +609,7 @@ async def upsert_participant(
)
await self.pttable.update_participant(
participant_id=participant.id,
external_id=participant.external_id,
external_ids=participant.external_ids,
reported_sex=participant.reported_sex,
reported_gender=participant.reported_gender,
meta=participant.meta,
Expand All @@ -614,7 +618,7 @@ async def upsert_participant(

else:
participant.id = await self.pttable.create_participant(
external_id=participant.external_id,
external_ids=participant.external_ids,
reported_sex=participant.reported_sex,
reported_gender=participant.reported_gender,
karyotype=participant.karyotype,
Expand Down
8 changes: 4 additions & 4 deletions db/python/layers/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ async def get_sample_by_id(
async def get_single_by_external_id(
self, external_id, project: ProjectId, check_active=True
) -> SampleInternal:
"""Get a Sample by its external_id"""
"""Get a Sample by (any of) its external_id(s)"""
return await self.st.get_single_by_external_id(
external_id, project, check_active=check_active
)
Expand All @@ -102,7 +102,7 @@ async def get_sample_id_map_by_external_ids(
project: ProjectId = None,
allow_missing=False,
) -> dict[str, int]:
"""Get map of samples {external_id: internal_id}"""
"""Get map of samples {(any) external_id: internal_id}"""
external_ids_set = set(external_ids)
_project = project or self.connection.project
assert _project
Expand Down Expand Up @@ -231,7 +231,7 @@ async def upsert_sample(
async with with_function():
if not sample.id:
sample.id = await self.st.insert_sample(
external_id=sample.external_id,
external_ids=sample.external_ids,
sample_type=sample.type,
active=True,
meta=sample.meta,
Expand All @@ -242,7 +242,7 @@ async def upsert_sample(
# Otherwise update
await self.st.update_sample(
id_=sample.id, # type: ignore
external_id=sample.external_id,
external_ids=sample.external_ids,
meta=sample.meta,
participant_id=sample.participant_id,
type_=sample.type,
Expand Down
2 changes: 1 addition & 1 deletion db/python/layers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def _get_search_result_for_sample(
except NotFoundError:
return None

sample_eids = [sample.external_id]
sample_eids = list(sample.external_ids.values())
participant_id = int(sample.participant_id) if sample.participant_id else None
participant_eids: list[str] = []
family_eids: list[str] = []
Expand Down
Loading

0 comments on commit 7ca0ee5

Please sign in to comment.