From 74badcffac911279a9e49d9b85813675877deea1 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Thu, 20 May 2021 16:09:10 +1200
Subject: [PATCH 01/12] Working implementation of scaffold search

---
 app/config.py             | 2 +-
 app/process_kb_results.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/app/config.py b/app/config.py
index 83616bba..17e59e1a 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
     SIM_CORE_TECH_LEAD_WRIKE_ID = os.environ.get("SIM_CORE_TECH_LEAD_WRIKE_ID")
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 86320309..29a7026a 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -137,6 +137,7 @@ def process_kb_results(results):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
         attr['csvFiles'] = find_csv_files(attr['csvFiles'])
+        attr['scaffolds'] = find_scaffold_json_files(hit['_source']['objects'])
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})
 
@@ -153,6 +154,12 @@ def find_csv_files(obj_list):
     return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv']
 
 
+def find_scaffold_json_files(obj_list):
+    if not obj_list:
+        return obj_list
+    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+
+
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict
 #  and cherrypick the attributes of interest
 def get_attributes(attributes, dataset):

From 5e3509607cd9421d1969a9156944f67d3f073e6c Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 1 Jun 2021 23:52:42 +1200
Subject: [PATCH 02/12] Add endpoint to retrieve doi from scicrunch

---
 app/config.py             |  2 +-
 app/main.py               | 14 ++++++++++++++
 app/process_kb_results.py | 22 ++++++++++++++++++++++
 main.py                   |  2 +-
 tests/test_scicrunch.py   |  5 +++++
 5 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/app/config.py b/app/config.py
index 4b2fa5cb..0684428a 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/app/main.py b/app/main.py
index 178ffd67..f3f1f171 100644
--- a/app/main.py
+++ b/app/main.py
@@ -210,6 +210,20 @@ def direct_download_url(path):
     resource = response["Body"].read()
     return resource
 
+# /scicrunch/: Returns scicrunch results for a given <search> query
+@app.route("/scicrunch-dataset/<doi1>/<doi2>")
+def sci_doi(doi1,doi2):
+    doi = doi1 + '/' + doi2
+    print(doi)
+    data = create_doi_request(doi)
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return response.json()
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
 
 # /search/: Returns scicrunch results for a given <search> query
 @app.route("/search/", defaults={'query': ''})
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 86320309..76e9612c 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -17,6 +17,23 @@
     'csvFiles': ['objects']
 }
 
+def create_doi_request(doi):
+
+    query = {
+        "query": {
+            "bool": {
+                "must": [{"match_all": {}}],
+                "should": [],
+                "filter": {
+                    "term": {
+                        "_id": doi
+                    }
+                }
+            }
+        }
+    }
+
+    return query
 
 # create_facet_query(type): Generates facet search request data for scicrunch  given a 'type'; where
 # 'type' is either 'species', 'gender', or 'genotype' at this stage.
@@ -146,6 +163,11 @@ def convert_doi_to_url(doi):
         return doi
     return doi.replace('DOI:', 'https://doi.org/')
 
+def convert_url_to_doi(doi):
+    if not doi:
+        return doi
+    return doi.replace('https://doi.org/', 'DOI:')
+
 
 def find_csv_files(obj_list):
     if not obj_list:
diff --git a/main.py b/main.py
index aa5c9d16..475dcbd8 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
 from app.main import app
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0")
+    app.run(host="0.0.0.0", port=8000)
diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
index bdc650e0..bd68c253 100644
--- a/tests/test_scicrunch.py
+++ b/tests/test_scicrunch.py
@@ -13,6 +13,11 @@ def test_scicrunch_keys(client):
     assert r.status_code == 200
     assert 'numberOfHits' in json.loads(r.data).keys()
 
+def test_scicrunch_dataset_doi(client):
+    r = client.get('/scicrunch-dataset/DOI%3A10.26275%2Fpzek-91wx')
+    assert json.loads(r.data)['hits']['hits'][0]['_id'] == "DOI:10.26275/pzek-91wx"
+
+
 def test_scicrunch_search(client):
     r = client.get('/search/heart')
     assert r.status_code == 200

From 9b37fc6f9e2a3a62871be6dad7fe06c069f0750b Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Wed, 2 Jun 2021 00:06:31 +1200
Subject: [PATCH 03/12] switch to more up to date index

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 0684428a..bb89b043 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From 6efca8eb71112ea89289d3138af1c835c2f428ab Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 15 Jun 2021 15:53:38 +1200
Subject: [PATCH 04/12] Switch back to stable endpoint

---
 app/config.py | 2 +-
 main.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/config.py b/app/config.py
index bb89b043..0684428a 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/main.py b/main.py
index 475dcbd8..aa5c9d16 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
 from app.main import app
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=8000)
+    app.run(host="0.0.0.0")

From 0a5872588fe995922bc507bd38fce8b52b52f08b Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Fri, 18 Jun 2021 16:21:07 +1200
Subject: [PATCH 05/12] Add endpoint for neuron queries

---
 app/main.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/app/main.py b/app/main.py
index f3f1f171..31a3abe3 100644
--- a/app/main.py
+++ b/app/main.py
@@ -225,6 +225,34 @@ def sci_doi(doi1,doi2):
         logging.error(err)
         return json.dumps({'error': err})
 
+
+# /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
+@app.route("/scicrunch-organ-query/<curie>")
+def sci_organ(curie):
+    data = {
+        "size": 20,
+        "from": 0,
+        "query": {
+            "query_string": {
+                "fields": [
+                    "*organ.curie"
+                ],
+                "query": curie
+            }
+        }
+    }
+
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return process_kb_results(response.json())
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
+
+
+
 # /search/: Returns scicrunch results for a given <search> query
 @app.route("/search/", defaults={'query': ''})
 @app.route("/search/<query>")

From 61c87718d9d45c34f5a52b3570d63de319c9cd74 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 17:32:38 +1200
Subject: [PATCH 06/12] Switch to dev endpoint

---
 app/config.py             |  2 +-
 app/main.py               | 17 +++++++++++++++++
 app/process_kb_results.py |  4 ++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/app/config.py b/app/config.py
index 0684428a..bb89b043 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/app/main.py b/app/main.py
index 31a3abe3..f6843b4a 100644
--- a/app/main.py
+++ b/app/main.py
@@ -242,6 +242,23 @@ def sci_organ(curie):
         }
     }
 
+    # # /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
+    # @app.route("/scicrunch-query-string/<field>/<curie>")
+    # def sci_organ(field, curie):
+    #     # field example: "*organ.curie"
+    #     data = {
+    #         "size": 20,
+    #         "from": 0,
+    #         "query": {
+    #             "query_string": {
+    #                 "fields": [
+    #                     field
+    #                 ],
+    #                 "query": curie
+    #             }
+    #         }
+    #     }
+
     try:
         response = requests.post(
             f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 44241a85..4f7bb7c1 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -173,13 +173,13 @@ def convert_url_to_doi(doi):
 def find_csv_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv']
+    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'text/csv']
 
 
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
 
 
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict

From 17836d48c75e7dbad779c0a20b37a77aa3925c53 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 17:39:26 +1200
Subject: [PATCH 07/12] Adjust query endpoint to be more flexible

---
 app/main.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/app/main.py b/app/main.py
index f6843b4a..b3381a51 100644
--- a/app/main.py
+++ b/app/main.py
@@ -226,39 +226,23 @@ def sci_doi(doi1,doi2):
         return json.dumps({'error': err})
 
 
-# /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
-@app.route("/scicrunch-organ-query/<curie>")
-def sci_organ(curie):
+# /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
+@app.route("/scicrunch-query-string/<field>/<curie>")
+def sci_organ(field, curie):
+    # field example: "*organ.curie"
     data = {
         "size": 20,
         "from": 0,
         "query": {
             "query_string": {
                 "fields": [
-                    "*organ.curie"
+                    field
                 ],
                 "query": curie
             }
         }
     }
 
-    # # /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
-    # @app.route("/scicrunch-query-string/<field>/<curie>")
-    # def sci_organ(field, curie):
-    #     # field example: "*organ.curie"
-    #     data = {
-    #         "size": 20,
-    #         "from": 0,
-    #         "query": {
-    #             "query_string": {
-    #                 "fields": [
-    #                     field
-    #                 ],
-    #                 "query": curie
-    #             }
-    #         }
-    #     }
-
     try:
         response = requests.post(
             f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',

From 10ce60f49fd4db85e74265f7476027c4ce5e2cb6 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 18:58:05 +1200
Subject: [PATCH 08/12] Bug fix and query change

 - Bug fix for some datasets not returning _any_ objects
 - Switch scicrunch-query-string from using the route to using URL
   params (this is now implemented in tehsurfer/map-side-bar)
---
 app/main.py               | 10 +++++-----
 app/process_kb_results.py |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/main.py b/app/main.py
index b3381a51..b73ff648 100644
--- a/app/main.py
+++ b/app/main.py
@@ -227,17 +227,17 @@ def sci_doi(doi1,doi2):
 
 
 # /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
-@app.route("/scicrunch-query-string/<field>/<curie>")
-def sci_organ(field, curie):
+@app.route("/scicrunch-query-string/")
+def sci_organ():
+    fields = request.args.getlist('field')
+    curie = request.args.get('curie')
     # field example: "*organ.curie"
     data = {
         "size": 20,
         "from": 0,
         "query": {
             "query_string": {
-                "fields": [
-                    field
-                ],
+                "fields": fields,
                 "query": curie
             }
         }
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 4f7bb7c1..12bdb14b 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -153,8 +153,9 @@ def process_kb_results(results):
     for i, hit in enumerate(hits):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
-        attr['csvFiles'] = find_csv_files(attr['csvFiles'])
-        attr['scaffolds'] = find_scaffold_json_files(hit['_source']['objects'])
+        objects = find_csv_files(attr['csvFiles']) # Have to do this as not all datsets return objects
+        attr['csvFiles'] = objects
+        attr['scaffolds'] = find_scaffold_json_files(objects)
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})
 

From 300a85af2f9eb0be87a447a14dc157ac7c604b08 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 19:12:57 +1200
Subject: [PATCH 09/12] Fix mistake in order of data processing

---
 app/process_kb_results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 12bdb14b..7031d7f7 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -153,8 +153,8 @@ def process_kb_results(results):
     for i, hit in enumerate(hits):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
-        objects = find_csv_files(attr['csvFiles']) # Have to do this as not all datsets return objects
-        attr['csvFiles'] = objects
+        objects = attr['csvFiles']  # Have to do this as not all datsets return objects
+        attr['csvFiles'] = find_csv_files(objects)
         attr['scaffolds'] = find_scaffold_json_files(objects)
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})

From 2bc2d6a08c6b7a8a9af7aec4f9f638e3fbb01c3a Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Wed, 23 Jun 2021 17:47:16 +1200
Subject: [PATCH 10/12] Switch to using additional mimetypes as intented

---
 app/process_kb_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 7031d7f7..4a28f101 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -180,7 +180,7 @@ def find_csv_files(obj_list):
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+    return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
 
 
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict

From 6fab9e09b4a5f86c85583043ebfdde7091450764 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Thu, 24 Jun 2021 14:10:54 +1200
Subject: [PATCH 11/12] Fix to hit processing

Previously there was no check to make sure we made it to the end of our
desired attributes. This has now been added along with some comments to
explain what is going on
---
 app/process_kb_results.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 4a28f101..abe7a1f6 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -190,11 +190,12 @@ def get_attributes(attributes, dataset):
     for k, attr in attributes.items():
         subset = dataset['_source'] # set our subest to the full dataset result
         key_attr = False
-        for key in attr:
+        for n, key in enumerate(attr): # step through attributes
             if isinstance(subset, dict):
-                if key in subset.keys():
+                if key in subset.keys(): # continue if keys are found
                     subset = subset[key]
-                    key_attr = subset
+                    if n+1 is len(attr): # if we made it to the end, save this subset
+                        key_attr = subset
         found_attr[k] = key_attr
     return found_attr
 

From 6f3702782aa689221bb07b519ec88c99fb294645 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Fri, 30 Jul 2021 11:26:14 +1200
Subject: [PATCH 12/12] Add Doi endpoint

---
 app/main.py               | 14 ++++++++++++++
 app/process_kb_results.py | 22 +++++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/app/main.py b/app/main.py
index b73ff648..91e408b3 100644
--- a/app/main.py
+++ b/app/main.py
@@ -225,6 +225,20 @@ def sci_doi(doi1,doi2):
         logging.error(err)
         return json.dumps({'error': err})
 
+# /scicrunch-processed/: Returns scicrunch results for a given <search> query
+@app.route("/scicrunch-dataset-processed/<doi1>/<doi2>")
+def sci_doi_processed(doi1,doi2):
+    doi = doi1 + '/' + doi2
+    print(doi)
+    data = create_doi_request(doi)
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return process_kb_results(response.json())
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
 
 # /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
 @app.route("/scicrunch-query-string/")
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index abe7a1f6..53094bbe 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -26,7 +26,7 @@ def create_doi_request(doi):
                 "should": [],
                 "filter": {
                     "term": {
-                        "_id": doi
+                        "_id": f'DOI:{doi}'
                     }
                 }
             }
@@ -42,7 +42,8 @@ def create_facet_query(type):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name.aggregate'],
         'gender': ['attributes.subject.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'genotype': ['anatomy.organ.name.aggregate'],
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     data = {
@@ -86,7 +87,8 @@ def create_filter_request(query, terms, facets, size, start):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name'],
         'gender': ['attributes.subject.sex.value', 'attributes.sample.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'genotype': ['anatomy.organ.name.aggregate'],
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     # Data structure of a scicrunch search
@@ -183,6 +185,20 @@ def find_scaffold_json_files(obj_list):
     return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
 
 
+attributes = {
+    'scaffolds': ['scaffolds'],
+    'samples': ['attributes','sample','subject'],
+    'name': ['item','name'],
+    'identifier': ['item', 'identifier'],
+    'uri': ['distributions', 'current', 'uri'],
+    'updated': ['dates', 'updated'],
+    'organs': ['anatomy', 'organ'],
+    'contributors': ['contributors'],
+    'doi': ['item', 'curie'],
+    'csvFiles': ['objects']
+}
+
+
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict
 #  and cherrypick the attributes of interest
 def get_attributes(attributes, dataset):