diff --git a/api/graphql/schema.py b/api/graphql/schema.py index a234fa90a..6f57a6574 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -522,6 +522,8 @@ class GraphQLAnalysis: active: bool meta: strawberry.scalars.JSON | None + project_id: strawberry.Private[int] + @staticmethod def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis': if not internal.id: @@ -534,6 +536,7 @@ def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis': timestamp_completed=internal.timestamp_completed, active=internal.active, meta=internal.meta, + project_id=internal.project, ) @strawberry.field @@ -547,7 +550,7 @@ async def sequencing_groups( @strawberry.field async def project(self, info: Info, root: 'GraphQLAnalysis') -> GraphQLProject: loader = info.context[LoaderKeys.PROJECTS_FOR_IDS] - project = await loader.load(root.project) + project = await loader.load(root.project_id) return GraphQLProject.from_internal(project) @strawberry.field diff --git a/api/routes/analysis.py b/api/routes/analysis.py index 12e73820b..86b37d775 100644 --- a/api/routes/analysis.py +++ b/api/routes/analysis.py @@ -18,14 +18,13 @@ from api.utils.export import ExportType from db.python.filters import GenericFilter from db.python.layers.analysis import AnalysisLayer +from db.python.layers.analysis_runner import AnalysisRunnerLayer from db.python.tables.analysis import AnalysisFilter +from db.python.tables.analysis_runner import AnalysisRunnerFilter from db.python.tables.project import ProjectPermissionsTable from models.enums import AnalysisStatus -from models.models.analysis import ( - Analysis, - AnalysisInternal, - ProportionalDateTemporalMethod, -) +from models.models.analysis import Analysis, ProportionalDateTemporalMethod +from models.models.analysis_runner import AnalysisRunner from models.utils.sequencing_group_id_format import ( sequencing_group_id_format, sequencing_group_id_format_list, @@ -242,28 +241,30 @@ async def query_analyses( async def get_analysis_runner_log( project_names: list[str] = Query(None), # type: ignore # author: str = None, # not implemented yet, uncomment when we do - output_dir: str = None, - ar_guid: str = None, + ar_guid: str | None = None, connection: Connection = get_projectless_db_connection, -) -> list[AnalysisInternal]: +) -> list[AnalysisRunner]: """ Get log for the analysis-runner, useful for checking this history of analysis """ - atable = AnalysisLayer(connection) - project_ids = None - if project_names: - pt = ProjectPermissionsTable(connection) - project_ids = await pt.get_project_ids_from_names_and_user( - connection.author, project_names, readonly=True - ) + if not project_names: + raise ValueError('Must specify "project_names"') + + arlayer = AnalysisRunnerLayer(connection) + pt = ProjectPermissionsTable(connection) + projects = await pt.get_and_check_access_to_projects_for_names( + connection.author, project_names, readonly=True + ) + project_ids = [p.id for p in projects if p.id] + project_map = {p.id: p.name for p in projects if p.id and p.name} - results = await atable.get_analysis_runner_log( - project_ids=project_ids, - # author=author, - output_dir=output_dir, - ar_guid=ar_guid, + results = await arlayer.query( + AnalysisRunnerFilter( + project=GenericFilter(in_=project_ids) if project_ids else None, + ar_guid=GenericFilter(eq=ar_guid) if ar_guid else None, + ) ) - return [a.to_external() for a in results] + return [a.to_external(project_map=project_map) for a in results] @router.get( diff --git a/db/python/layers/analysis.py b/db/python/layers/analysis.py index a7eb65d49..60f216d85 100644 --- a/db/python/layers/analysis.py +++ b/db/python/layers/analysis.py @@ -590,20 +590,3 @@ async def update_analysis( meta=meta, output=output, ) - - async def get_analysis_runner_log( - self, - project_ids: list[int] = None, - # author: str = None, - output_dir: str = None, - ar_guid: str = None, - ) -> list[AnalysisInternal]: - """ - Get log for the analysis-runner, useful for checking this history of analysis - """ - return await self.at.get_analysis_runner_log( - project_ids, - # author=author, - output_dir=output_dir, - ar_guid=ar_guid, - ) diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py index cb6ebee16..16765c678 100644 --- a/db/python/tables/analysis.py +++ b/db/python/tables/analysis.py @@ -409,7 +409,10 @@ async def get_sample_cram_path_map_for_seqr( ) -> List[dict[str, str]]: """Get (ext_sample_id, cram_path, internal_id) map""" - values: dict[str, Any] = {'project': project, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG} + values: dict[str, Any] = { + 'project': project, + 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG, + } filters = [ 'a.active', 'a.type = "cram"', @@ -447,39 +450,6 @@ async def get_sample_cram_path_map_for_seqr( # many per analysis return [dict(d) for d in rows] - async def get_analysis_runner_log( - self, - project_ids: List[int] = None, - # author: str = None, - output_dir: str = None, - ar_guid: str = None, - ) -> List[AnalysisInternal]: - """ - Get log for the analysis-runner, useful for checking this history of analysis - """ - values: dict[str, Any] = {} - wheres = [ - "type = 'analysis-runner'", - 'active', - ] - if project_ids: - wheres.append('project in :project_ids') - values['project_ids'] = project_ids - - if output_dir: - wheres.append('(output = :output OR output LIKE :output_like)') - values['output'] = output_dir - values['output_like'] = f'%{output_dir}' - - if ar_guid: - wheres.append('JSON_EXTRACT(meta, "$.ar_guid") = :ar_guid') - values['ar_guid'] = ar_guid - - wheres_str = ' AND '.join(wheres) - _query = f'SELECT * FROM analysis WHERE {wheres_str}' - rows = await self.connection.fetch_all(_query, values) - return [AnalysisInternal.from_db(**dict(r)) for r in rows] - # region STATS async def get_number_of_crams_by_sequencing_type( diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index e350cd3d2..ad98ad97c 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -75,7 +75,7 @@ meta type } - analyses { + analyses(type: {in_: ["cram", "gvcf"]}) { active id meta @@ -506,9 +506,6 @@ def transfer_analyses( ) existing_sgid = existing_sg.get('id') if existing_sg else None for analysis in sg['analyses']: - if analysis['type'] not in ['cram', 'gvcf']: - # Currently the create_test_subset script only handles crams or gvcf files. - continue existing_analysis: dict = {} if existing_sgid: @@ -867,9 +864,10 @@ def copy_files_in_dict(d, dataset: str, sid_replacement: tuple[str, str] = None) new_path = new_path.replace(sid_replacement[0], sid_replacement[1]) if not file_exists(new_path): - cmd = f'gsutil cp {old_path!r} {new_path!r}' - logger.info(f'Copying file in metadata: {cmd}') - subprocess.run(cmd, check=False, shell=True) + cmd = ['gcloud', 'storage', 'cp', old_path, new_path] + logger.info(f'Copying file in metadata: {" ".join(cmd)}') + subprocess.run(cmd, check=True) + extra_exts = ['.md5'] if new_path.endswith('.vcf.gz'): extra_exts.append('.tbi') @@ -877,9 +875,9 @@ def copy_files_in_dict(d, dataset: str, sid_replacement: tuple[str, str] = None) extra_exts.append('.crai') for ext in extra_exts: if file_exists(old_path + ext) and not file_exists(new_path + ext): - cmd = f'gsutil cp {old_path + ext!r} {new_path + ext!r}' - logger.info(f'Copying extra file in metadata: {cmd}') - subprocess.run(cmd, check=False, shell=True) + cmd = ['gcloud', 'storage', 'cp', old_path + ext, new_path + ext] + logger.info(f'Copying extra file in metadata: {" ".join(cmd)}') + subprocess.run(cmd, check=True) return new_path if isinstance(d, list): return [copy_files_in_dict(x, dataset) for x in d] diff --git a/test/test_graphql.py b/test/test_graphql.py index 41ad61e14..703da2755 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -394,3 +394,39 @@ async def test_family_participants(self): }, ], ) + + @run_as_sync + async def test_get_project_name_from_analysis(self): + """Test getting project name from analysis""" + p = await self.player.upsert_participant(_get_single_participant_upsert()) + sg_id = p.samples[0].sequencing_groups[0].id + + alayer = AnalysisLayer(self.connection) + await alayer.create_analysis( + AnalysisInternal( + sequencing_group_ids=[sg_id], + type='cram', + status=AnalysisStatus.COMPLETED, + meta={}, + output='some-output', + ) + ) + + q = """ +query MyQuery($sg_id: String!) { + sequencingGroups(id: {eq: $sg_id}) { + analyses { + id + project { + name + } + } + } +}""" + + resp = await self.run_graphql_query_async( + q, {'sg_id': sequencing_group_id_format(sg_id)} + ) + self.assertIn('sequencingGroups', resp) + project_name = resp['sequencingGroups'][0]['analyses'][0]['project']['name'] + self.assertEqual(self.project_name, project_name)