Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release: Restrict create_test_subset to GVCFs and CRAMs (#852) #853

Merged
merged 1 commit into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion api/graphql/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@ class GraphQLAnalysis:
active: bool
meta: strawberry.scalars.JSON | None

project_id: strawberry.Private[int]

@staticmethod
def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis':
if not internal.id:
Expand All @@ -534,6 +536,7 @@ def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis':
timestamp_completed=internal.timestamp_completed,
active=internal.active,
meta=internal.meta,
project_id=internal.project,
)

@strawberry.field
Expand All @@ -547,7 +550,7 @@ async def sequencing_groups(
@strawberry.field
async def project(self, info: Info, root: 'GraphQLAnalysis') -> GraphQLProject:
loader = info.context[LoaderKeys.PROJECTS_FOR_IDS]
project = await loader.load(root.project)
project = await loader.load(root.project_id)
return GraphQLProject.from_internal(project)

@strawberry.field
Expand Down
43 changes: 22 additions & 21 deletions api/routes/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@
from api.utils.export import ExportType
from db.python.filters import GenericFilter
from db.python.layers.analysis import AnalysisLayer
from db.python.layers.analysis_runner import AnalysisRunnerLayer
from db.python.tables.analysis import AnalysisFilter
from db.python.tables.analysis_runner import AnalysisRunnerFilter
from db.python.tables.project import ProjectPermissionsTable
from models.enums import AnalysisStatus
from models.models.analysis import (
Analysis,
AnalysisInternal,
ProportionalDateTemporalMethod,
)
from models.models.analysis import Analysis, ProportionalDateTemporalMethod
from models.models.analysis_runner import AnalysisRunner
from models.utils.sequencing_group_id_format import (
sequencing_group_id_format,
sequencing_group_id_format_list,
Expand Down Expand Up @@ -242,28 +241,30 @@ async def query_analyses(
async def get_analysis_runner_log(
project_names: list[str] = Query(None), # type: ignore
# author: str = None, # not implemented yet, uncomment when we do
output_dir: str = None,
ar_guid: str = None,
ar_guid: str | None = None,
connection: Connection = get_projectless_db_connection,
) -> list[AnalysisInternal]:
) -> list[AnalysisRunner]:
"""
Get log for the analysis-runner, useful for checking this history of analysis
"""
atable = AnalysisLayer(connection)
project_ids = None
if project_names:
pt = ProjectPermissionsTable(connection)
project_ids = await pt.get_project_ids_from_names_and_user(
connection.author, project_names, readonly=True
)
if not project_names:
raise ValueError('Must specify "project_names"')

arlayer = AnalysisRunnerLayer(connection)
pt = ProjectPermissionsTable(connection)
projects = await pt.get_and_check_access_to_projects_for_names(
connection.author, project_names, readonly=True
)
project_ids = [p.id for p in projects if p.id]
project_map = {p.id: p.name for p in projects if p.id and p.name}

results = await atable.get_analysis_runner_log(
project_ids=project_ids,
# author=author,
output_dir=output_dir,
ar_guid=ar_guid,
results = await arlayer.query(
AnalysisRunnerFilter(
project=GenericFilter(in_=project_ids) if project_ids else None,
ar_guid=GenericFilter(eq=ar_guid) if ar_guid else None,
)
)
return [a.to_external() for a in results]
return [a.to_external(project_map=project_map) for a in results]


@router.get(
Expand Down
17 changes: 0 additions & 17 deletions db/python/layers/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,20 +590,3 @@ async def update_analysis(
meta=meta,
output=output,
)

async def get_analysis_runner_log(
self,
project_ids: list[int] = None,
# author: str = None,
output_dir: str = None,
ar_guid: str = None,
) -> list[AnalysisInternal]:
"""
Get log for the analysis-runner, useful for checking this history of analysis
"""
return await self.at.get_analysis_runner_log(
project_ids,
# author=author,
output_dir=output_dir,
ar_guid=ar_guid,
)
38 changes: 4 additions & 34 deletions db/python/tables/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,10 @@ async def get_sample_cram_path_map_for_seqr(
) -> List[dict[str, str]]:
"""Get (ext_sample_id, cram_path, internal_id) map"""

values: dict[str, Any] = {'project': project, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG}
values: dict[str, Any] = {
'project': project,
'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG,
}
filters = [
'a.active',
'a.type = "cram"',
Expand Down Expand Up @@ -447,39 +450,6 @@ async def get_sample_cram_path_map_for_seqr(
# many per analysis
return [dict(d) for d in rows]

async def get_analysis_runner_log(
self,
project_ids: List[int] = None,
# author: str = None,
output_dir: str = None,
ar_guid: str = None,
) -> List[AnalysisInternal]:
"""
Get log for the analysis-runner, useful for checking this history of analysis
"""
values: dict[str, Any] = {}
wheres = [
"type = 'analysis-runner'",
'active',
]
if project_ids:
wheres.append('project in :project_ids')
values['project_ids'] = project_ids

if output_dir:
wheres.append('(output = :output OR output LIKE :output_like)')
values['output'] = output_dir
values['output_like'] = f'%{output_dir}'

if ar_guid:
wheres.append('JSON_EXTRACT(meta, "$.ar_guid") = :ar_guid')
values['ar_guid'] = ar_guid

wheres_str = ' AND '.join(wheres)
_query = f'SELECT * FROM analysis WHERE {wheres_str}'
rows = await self.connection.fetch_all(_query, values)
return [AnalysisInternal.from_db(**dict(r)) for r in rows]

# region STATS

async def get_number_of_crams_by_sequencing_type(
Expand Down
18 changes: 8 additions & 10 deletions scripts/create_test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
meta
type
}
analyses {
analyses(type: {in_: ["cram", "gvcf"]}) {
active
id
meta
Expand Down Expand Up @@ -506,9 +506,6 @@ def transfer_analyses(
)
existing_sgid = existing_sg.get('id') if existing_sg else None
for analysis in sg['analyses']:
if analysis['type'] not in ['cram', 'gvcf']:
# Currently the create_test_subset script only handles crams or gvcf files.
continue

existing_analysis: dict = {}
if existing_sgid:
Expand Down Expand Up @@ -867,19 +864,20 @@ def copy_files_in_dict(d, dataset: str, sid_replacement: tuple[str, str] = None)
new_path = new_path.replace(sid_replacement[0], sid_replacement[1])

if not file_exists(new_path):
cmd = f'gsutil cp {old_path!r} {new_path!r}'
logger.info(f'Copying file in metadata: {cmd}')
subprocess.run(cmd, check=False, shell=True)
cmd = ['gcloud', 'storage', 'cp', old_path, new_path]
logger.info(f'Copying file in metadata: {" ".join(cmd)}')
subprocess.run(cmd, check=True)

extra_exts = ['.md5']
if new_path.endswith('.vcf.gz'):
extra_exts.append('.tbi')
if new_path.endswith('.cram'):
extra_exts.append('.crai')
for ext in extra_exts:
if file_exists(old_path + ext) and not file_exists(new_path + ext):
cmd = f'gsutil cp {old_path + ext!r} {new_path + ext!r}'
logger.info(f'Copying extra file in metadata: {cmd}')
subprocess.run(cmd, check=False, shell=True)
cmd = ['gcloud', 'storage', 'cp', old_path + ext, new_path + ext]
logger.info(f'Copying extra file in metadata: {" ".join(cmd)}')
subprocess.run(cmd, check=True)
return new_path
if isinstance(d, list):
return [copy_files_in_dict(x, dataset) for x in d]
Expand Down
36 changes: 36 additions & 0 deletions test/test_graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,39 @@ async def test_family_participants(self):
},
],
)

@run_as_sync
async def test_get_project_name_from_analysis(self):
"""Test getting project name from analysis"""
p = await self.player.upsert_participant(_get_single_participant_upsert())
sg_id = p.samples[0].sequencing_groups[0].id

alayer = AnalysisLayer(self.connection)
await alayer.create_analysis(
AnalysisInternal(
sequencing_group_ids=[sg_id],
type='cram',
status=AnalysisStatus.COMPLETED,
meta={},
output='some-output',
)
)

q = """
query MyQuery($sg_id: String!) {
sequencingGroups(id: {eq: $sg_id}) {
analyses {
id
project {
name
}
}
}
}"""

resp = await self.run_graphql_query_async(
q, {'sg_id': sequencing_group_id_format(sg_id)}
)
self.assertIn('sequencingGroups', resp)
project_name = resp['sequencingGroups'][0]['analyses'][0]['project']['name']
self.assertEqual(self.project_name, project_name)