populationgenomics · illusional · Jul 2, 2024 · Jul 2, 2024
diff --git a/api/graphql/schema.py b/api/graphql/schema.py
@@ -522,6 +522,8 @@ class GraphQLAnalysis:
     active: bool
     meta: strawberry.scalars.JSON | None
 
+    project_id: strawberry.Private[int]
+
     @staticmethod
     def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis':
         if not internal.id:
@@ -534,6 +536,7 @@ def from_internal(internal: AnalysisInternal) -> 'GraphQLAnalysis':
             timestamp_completed=internal.timestamp_completed,
             active=internal.active,
             meta=internal.meta,
+            project_id=internal.project,
         )
 
     @strawberry.field
@@ -547,7 +550,7 @@ async def sequencing_groups(
     @strawberry.field
     async def project(self, info: Info, root: 'GraphQLAnalysis') -> GraphQLProject:
         loader = info.context[LoaderKeys.PROJECTS_FOR_IDS]
-        project = await loader.load(root.project)
+        project = await loader.load(root.project_id)
         return GraphQLProject.from_internal(project)
 
     @strawberry.field

diff --git a/api/routes/analysis.py b/api/routes/analysis.py
@@ -18,14 +18,13 @@
 from api.utils.export import ExportType
 from db.python.filters import GenericFilter
 from db.python.layers.analysis import AnalysisLayer
+from db.python.layers.analysis_runner import AnalysisRunnerLayer
 from db.python.tables.analysis import AnalysisFilter
+from db.python.tables.analysis_runner import AnalysisRunnerFilter
 from db.python.tables.project import ProjectPermissionsTable
 from models.enums import AnalysisStatus
-from models.models.analysis import (
-    Analysis,
-    AnalysisInternal,
-    ProportionalDateTemporalMethod,
-)
+from models.models.analysis import Analysis, ProportionalDateTemporalMethod
+from models.models.analysis_runner import AnalysisRunner
 from models.utils.sequencing_group_id_format import (
     sequencing_group_id_format,
     sequencing_group_id_format_list,
@@ -242,28 +241,30 @@ async def query_analyses(
 async def get_analysis_runner_log(
     project_names: list[str] = Query(None),  # type: ignore
     # author: str = None, # not implemented yet, uncomment when we do
-    output_dir: str = None,
-    ar_guid: str = None,
+    ar_guid: str | None = None,
     connection: Connection = get_projectless_db_connection,
-) -> list[AnalysisInternal]:
+) -> list[AnalysisRunner]:
     """
     Get log for the analysis-runner, useful for checking this history of analysis
     """
-    atable = AnalysisLayer(connection)
-    project_ids = None
-    if project_names:
-        pt = ProjectPermissionsTable(connection)
-        project_ids = await pt.get_project_ids_from_names_and_user(
-            connection.author, project_names, readonly=True
-        )
+    if not project_names:
+        raise ValueError('Must specify "project_names"')
+
+    arlayer = AnalysisRunnerLayer(connection)
+    pt = ProjectPermissionsTable(connection)
+    projects = await pt.get_and_check_access_to_projects_for_names(
+        connection.author, project_names, readonly=True
+    )
+    project_ids = [p.id for p in projects if p.id]
+    project_map = {p.id: p.name for p in projects if p.id and p.name}
 
-    results = await atable.get_analysis_runner_log(
-        project_ids=project_ids,
-        # author=author,
-        output_dir=output_dir,
-        ar_guid=ar_guid,
+    results = await arlayer.query(
+        AnalysisRunnerFilter(
+            project=GenericFilter(in_=project_ids) if project_ids else None,
+            ar_guid=GenericFilter(eq=ar_guid) if ar_guid else None,
+        )
     )
-    return [a.to_external() for a in results]
+    return [a.to_external(project_map=project_map) for a in results]
 
 
 @router.get(

diff --git a/db/python/layers/analysis.py b/db/python/layers/analysis.py
@@ -590,20 +590,3 @@ async def update_analysis(
             meta=meta,
             output=output,
         )
-
-    async def get_analysis_runner_log(
-        self,
-        project_ids: list[int] = None,
-        # author: str = None,
-        output_dir: str = None,
-        ar_guid: str = None,
-    ) -> list[AnalysisInternal]:
-        """
-        Get log for the analysis-runner, useful for checking this history of analysis
-        """
-        return await self.at.get_analysis_runner_log(
-            project_ids,
-            # author=author,
-            output_dir=output_dir,
-            ar_guid=ar_guid,
-        )
diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py
@@ -409,7 +409,10 @@ async def get_sample_cram_path_map_for_seqr(
     ) -> List[dict[str, str]]:
         """Get (ext_sample_id, cram_path, internal_id) map"""
 
-        values: dict[str, Any] = {'project': project, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG}
+        values: dict[str, Any] = {
+            'project': project,
+            'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG,
+        }
         filters = [
             'a.active',
             'a.type = "cram"',
@@ -447,39 +450,6 @@ async def get_sample_cram_path_map_for_seqr(
         # many per analysis
         return [dict(d) for d in rows]
 
-    async def get_analysis_runner_log(
-        self,
-        project_ids: List[int] = None,
-        # author: str = None,
-        output_dir: str = None,
-        ar_guid: str = None,
-    ) -> List[AnalysisInternal]:
-        """
-        Get log for the analysis-runner, useful for checking this history of analysis
-        """
-        values: dict[str, Any] = {}
-        wheres = [
-            "type = 'analysis-runner'",
-            'active',
-        ]
-        if project_ids:
-            wheres.append('project in :project_ids')
-            values['project_ids'] = project_ids
-
-        if output_dir:
-            wheres.append('(output = :output OR output LIKE :output_like)')
-            values['output'] = output_dir
-            values['output_like'] = f'%{output_dir}'
-
-        if ar_guid:
-            wheres.append('JSON_EXTRACT(meta, "$.ar_guid") = :ar_guid')
-            values['ar_guid'] = ar_guid
-
-        wheres_str = ' AND '.join(wheres)
-        _query = f'SELECT * FROM analysis WHERE {wheres_str}'
-        rows = await self.connection.fetch_all(_query, values)
-        return [AnalysisInternal.from_db(**dict(r)) for r in rows]
-
     # region STATS
 
     async def get_number_of_crams_by_sequencing_type(

diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py
@@ -75,7 +75,7 @@
                         meta
                         type
                     }
-                    analyses {
+                    analyses(type: {in_: ["cram", "gvcf"]}) {
                         active
                         id
                         meta
@@ -506,9 +506,6 @@ def transfer_analyses(
             )
             existing_sgid = existing_sg.get('id') if existing_sg else None
             for analysis in sg['analyses']:
-                if analysis['type'] not in ['cram', 'gvcf']:
-                    # Currently the create_test_subset script only handles crams or gvcf files.
-                    continue
 
                 existing_analysis: dict = {}
                 if existing_sgid:
@@ -867,19 +864,20 @@ def copy_files_in_dict(d, dataset: str, sid_replacement: tuple[str, str] = None)
             new_path = new_path.replace(sid_replacement[0], sid_replacement[1])
 
         if not file_exists(new_path):
-            cmd = f'gsutil cp {old_path!r} {new_path!r}'
-            logger.info(f'Copying file in metadata: {cmd}')
-            subprocess.run(cmd, check=False, shell=True)
+            cmd = ['gcloud', 'storage', 'cp', old_path, new_path]
+            logger.info(f'Copying file in metadata: {" ".join(cmd)}')
+            subprocess.run(cmd, check=True)
+
         extra_exts = ['.md5']
         if new_path.endswith('.vcf.gz'):
             extra_exts.append('.tbi')
         if new_path.endswith('.cram'):
             extra_exts.append('.crai')
         for ext in extra_exts:
             if file_exists(old_path + ext) and not file_exists(new_path + ext):
-                cmd = f'gsutil cp {old_path + ext!r} {new_path + ext!r}'
-                logger.info(f'Copying extra file in metadata: {cmd}')
-                subprocess.run(cmd, check=False, shell=True)
+                cmd = ['gcloud', 'storage', 'cp', old_path + ext, new_path + ext]
+                logger.info(f'Copying extra file in metadata: {" ".join(cmd)}')
+                subprocess.run(cmd, check=True)
         return new_path
     if isinstance(d, list):
         return [copy_files_in_dict(x, dataset) for x in d]

diff --git a/test/test_graphql.py b/test/test_graphql.py
@@ -394,3 +394,39 @@ async def test_family_participants(self):
                 },
             ],
         )
+
+    @run_as_sync
+    async def test_get_project_name_from_analysis(self):
+        """Test getting project name from analysis"""
+        p = await self.player.upsert_participant(_get_single_participant_upsert())
+        sg_id = p.samples[0].sequencing_groups[0].id
+
+        alayer = AnalysisLayer(self.connection)
+        await alayer.create_analysis(
+            AnalysisInternal(
+                sequencing_group_ids=[sg_id],
+                type='cram',
+                status=AnalysisStatus.COMPLETED,
+                meta={},
+                output='some-output',
+            )
+        )
+
+        q = """
+query MyQuery($sg_id: String!) {
+  sequencingGroups(id: {eq: $sg_id}) {
+    analyses {
+      id
+      project {
+        name
+      }
+    }
+  }
+}"""
+
+        resp = await self.run_graphql_query_async(
+            q, {'sg_id': sequencing_group_id_format(sg_id)}
+        )
+        self.assertIn('sequencingGroups', resp)
+        project_name = resp['sequencingGroups'][0]['analyses'][0]['project']['name']
+        self.assertEqual(self.project_name, project_name)