diff --git a/README.md b/README.md index 424d2817..b306f00b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ Make sure you have python 3 installed `python3 --version` python3 -m venv ./venv . ./venv/bin/activate pip install -r requirements.txt -pip install -r requirements-dev.txt gunicorn main:app ``` @@ -24,6 +23,7 @@ If you do not have the NIH SPARC portal user environment variables setup already After the previous steps or if you already have those environment variables setup, run: ``` +export PYTHONPATH=`pwd` pip install -r requirements-dev.txt pytest ``` diff --git a/app/config.py b/app/config.py index 4b2fa5cb..adf2a509 100644 --- a/app/config.py +++ b/app/config.py @@ -31,7 +31,7 @@ class Config(object): KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key") DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development") SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com") - SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr") + SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr") MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates") SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates") WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN") @@ -42,4 +42,7 @@ class Config(object): CCB_HEAD_WRIKE_ID = os.environ.get("CCB_HEAD_WRIKE_ID") MODERATOR_WRIKE_ID = os.environ.get("MODERATOR_WRIKE_ID") MAILCHIMP_API_KEY = os.environ.get("MAILCHIMP_API_KEY") + OSPARC_API_URL=os.environ.get("OSPARC_API_URL", "https://api.osparc.io") + OSPARC_API_KEY=os.environ.get("OSPARC_API_KEY") + OSPARC_API_SECRET=os.environ.get("OSPARC_API_SECRET") diff --git a/app/main.py b/app/main.py index 178ffd67..c09518d1 100644 --- a/app/main.py +++ b/app/main.py @@ -24,6 +24,9 @@ from requests.auth import HTTPBasicAuth import os +import app.osparc as osparc +import requests + # from pymongo import MongoClient app = Flask(__name__) @@ -210,6 +213,59 @@ def direct_download_url(path): resource = response["Body"].read() return resource +# /scicrunch/: Returns scicrunch results for a given query +@app.route("/scicrunch-dataset//") +def sci_doi(doi1,doi2): + doi = doi1 + '/' + doi2 + print(doi) + data = create_doi_request(doi) + try: + response = requests.post( + f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}', + json=data) + return response.json() + except requests.exceptions.HTTPError as err: + logging.error(err) + return json.dumps({'error': err}) + +# /pubmed/ Used as a proxy for making requests to pubmed +@app.route("/pubmed/") +@app.route("/pubmed//") +def pubmed(id): + try: + response = requests.get(f'https://pubmed.ncbi.nlm.nih.gov/{id}/') + return response.text + except requests.exceptions.HTTPError as err: + logging.error(err) + return json.dumps({'error': err}) + +# /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar +@app.route("/scicrunch-query-string/") +def sci_organ(): + fields = request.args.getlist('field') + curie = request.args.get('curie') + # field example: "*organ.curie" + data = { + "size": 20, + "from": 0, + "query": { + "query_string": { + "fields": fields, + "query": curie + } + } + } + + try: + response = requests.post( + f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}', + json=data) + return process_kb_results(response.json()) + except requests.exceptions.HTTPError as err: + logging.error(err) + return json.dumps({'error': err}) + + # /search/: Returns scicrunch results for a given query @app.route("/search/", defaults={'query': ''}) @@ -243,9 +299,9 @@ def filter_search(query): results = process_kb_results(response.json()) except requests.exceptions.HTTPError as err: logging.error(err) - return jsonify({'error': str(err), 'message': 'Scicrunch is not currently reachable, please try again later'}), 502 + return jsonify({'error': str(err), 'message': 'SciCrunch is not currently reachable, please try again later'}), 502 except json.JSONDecodeError as e: - return jsonify({'message': 'Could not parse Scicrunch output, please try again later', + return jsonify({'message': 'Could not parse SciCrunch output, please try again later', 'error': 'JSONDecodeError'}), 502 return results @@ -268,7 +324,7 @@ def get_facets(type): json_result = response.json() results.append(json_result) except BaseException as e: - return jsonify({'message': 'Could not parse Scicrunch output, please try again later', + return jsonify({'message': 'Could not parse SciCrunch output, please try again later', 'error': 'JSONDecodeError'}), 502 # Select terms from the results @@ -565,3 +621,39 @@ def subscribe_to_mailchimp(): return resp.json() else: abort(400, description="Missing email_address, first_name or last_name") + + +@app.route("/simulation", methods=["POST"]) +def simulation(): + data = request.get_json() + + if data and "model_url" in data and "json_config" in data: + return json.dumps(osparc.run_simulation(data["model_url"], data["json_config"])) + else: + abort(400, description="Missing model URL and/or JSON configuration") + + +@app.route("/pmr_latest_exposure", methods=["POST"]) +def pmr_latest_exposure(): + data = request.get_json() + + if data and "workspace_url" in data: + try: + resp = requests.get(data["workspace_url"], + headers={"Accept": "application/vnd.physiome.pmr2.json.1"}) + if resp.status_code == 200: + try: + # Return the latest exposure for the given workspace. + url = resp.json()["collection"]["items"][0]["links"][0]["href"] + except: + # There is no latest exposure for the given workspace. + url = "" + return jsonify( + url=url + ) + else: + return resp.json() + except: + abort(400, description="Invalid workspace URL") + else: + abort(400, description="Missing workspace URL") diff --git a/app/osparc.py b/app/osparc.py new file mode 100644 index 00000000..9658ae1c --- /dev/null +++ b/app/osparc.py @@ -0,0 +1,103 @@ +from app.config import Config +import json +import osparc +import tempfile +from time import sleep + + +def run_simulation(model_url, json_config): + temp_config_file = tempfile.NamedTemporaryFile(mode="w+") + + json.dump(json_config, temp_config_file) + + temp_config_file.seek(0) + + try: + api_client = osparc.ApiClient(osparc.Configuration( + host=Config.OSPARC_API_URL, + username=Config.OSPARC_API_KEY, + password=Config.OSPARC_API_SECRET + )) + + # Upload the configuration file. + + files_api = osparc.FilesApi(api_client) + + try: + config_file = files_api.upload_file(temp_config_file.name) + except: + raise Exception( + "the simulation configuration file could not be uploaded") + + # Create the simulation. + + solvers_api = osparc.SolversApi(api_client) + + solver = solvers_api.get_solver_release( + "simcore/services/comp/opencor", "1.0.3") + + job = solvers_api.create_job( + solver.id, + solver.version, + osparc.JobInputs({ + "model_url": model_url, + "config_file": config_file + }) + ) + + # Start the simulation job. + + status = solvers_api.start_job(solver.id, solver.version, job.id) + + if status.state != "PUBLISHED": + raise Exception("the simulation job could not be submitted") + + # Wait for the simulation job to be complete (or to fail). + + while True: + status = solvers_api.inspect_job(solver.id, solver.version, job.id) + + if status.progress == 100: + break + + sleep(1) + + status = solvers_api.inspect_job(solver.id, solver.version, job.id) + + if status.state != "SUCCESS": + raise Exception("the simulation failed") + + # Retrieve the simulation job outputs. + + try: + outputs = solvers_api.get_job_outputs( + solver.id, solver.version, job.id) + except: + raise Exception( + "the simulation job outputs could not be retrieved") + + # Download the simulation results. + + try: + results_filename = files_api.download_file( + outputs.results["output_1"].id) + except: + raise Exception("the simulation results could not be retrieved") + + results_file = open(results_filename, "r") + + res = { + "status": "ok", + "results": json.load(results_file) + } + + results_file.close() + except Exception as e: + res = { + "status": "nok", + "description": e.args[0] if len(e.args) > 0 else "unknown" + } + + temp_config_file.close() + + return res diff --git a/app/process_kb_results.py b/app/process_kb_results.py index f7a8616e..4ea1f4fe 100644 --- a/app/process_kb_results.py +++ b/app/process_kb_results.py @@ -5,6 +5,7 @@ # samples: ['attributes','sample','subject'] will find and enter dict keys in the following order: # attributes > sample > subject attributes = { + 'additionalLinks': ['xrefs', 'additionalLinks'], 'scaffolds': ['scaffolds'], 'samples': ['attributes','sample','subject'], 'name': ['item','name'], @@ -14,13 +15,31 @@ 'organs': ['anatomy', 'organ'], 'contributors': ['contributors'], 'doi': ['item', 'curie'], - 'csvFiles': ['objects'] + 'csvFiles': ['objects'], + 'pennsieve': ['pennsieve'] } +def create_doi_request(doi): + + query = { + "query": { + "bool": { + "must": [{"match_all": {}}], + "should": [], + "filter": { + "term": { + "_id": doi + } + } + } + } + } + + return query # create_facet_query(type): Generates facet search request data for scicrunch given a 'type'; where -# 'type' is either 'species', 'gender', or 'genotype' at this stage. +# 'type' is either 'species', 'gender', or 'organ' at this stage. # Returns a tuple of the typemap and request data ( type_map, data ) def create_facet_query(type): type_map = { @@ -64,7 +83,7 @@ def create_filter_request(query, terms, facets, size, start): if start is None: start = 0 - if query is "" and len(terms) is 0 and len(facets) is 0: + if not query and not terms and not facets: return {"size": size, "from": start} # Type map is used to map scicrunch paths to given facet @@ -113,24 +132,38 @@ def facet_query_string(query, terms, facets, type_map): # Add search query if it exists qt = "" - if query is not "": + if query: qt = f'({query})' - if query is not "" and len(t) > 0: + if query and t: qt += " AND " # Add the brackets and OR and AND parameters for k in t: - qt += type_map[k][0] + ":(" # facet term path and opening bracket - for l in t[k]: - qt += f"({l})" # bracket around terms incase there are spaces - if l is not t[k][-1]: - qt += " OR " # 'OR' if more terms in this facet are coming - else: - qt += ") " + if k == "datasets": + needParentheses = (qt or len(t) > 1) and (len(t[k]) > 1) + if needParentheses: + qt += "(" + for l in t[k]: + if l == "scaffolds": + qt += "objects.additional_mimetype.name:((inode%2fvnd.abi.scaffold) AND (file))" + elif l == "simulations": + qt += "xrefs.additionalLinks.description:((CellML) OR (SED-ML))" + if l is not t[k][-1]: + qt += " OR " # 'OR' if more terms in this facet are coming + if needParentheses: + qt += ")" + else: + qt += type_map[k][0] + ":(" # facet term path and opening bracket + for l in t[k]: + qt += f"({l})" # bracket around terms incase there are spaces + if l is not t[k][-1]: + qt += " OR " # 'OR' if more terms in this facet are coming + else: + qt += ")" if k is not list(t.keys())[-1]: # Add 'AND' if we are not at the last item - qt += " AND " + qt += " AND " return qt @@ -141,7 +174,9 @@ def process_kb_results(results): for i, hit in enumerate(hits): attr = get_attributes(attributes, hit) attr['doi'] = convert_doi_to_url(attr['doi']) - attr['csvFiles'] = find_csv_files(attr['csvFiles']) + objects = attr['csvFiles'] # Have to do this as not all datsets return objects + attr['csvFiles'] = find_csv_files(objects) + attr['scaffolds'] = find_scaffold_json_files(objects) output.append(attr) return json.dumps({'numberOfHits': results['hits']['total'], 'results': output}) @@ -151,11 +186,22 @@ def convert_doi_to_url(doi): return doi return doi.replace('DOI:', 'https://doi.org/') +def convert_url_to_doi(doi): + if not doi: + return doi + return doi.replace('https://doi.org/', 'DOI:') + def find_csv_files(obj_list): if not obj_list: return obj_list - return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv'] + return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'text/csv'] + + +def find_scaffold_json_files(obj_list): + if not obj_list: + return obj_list + return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file'] # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict @@ -165,11 +211,12 @@ def get_attributes(attributes, dataset): for k, attr in attributes.items(): subset = dataset['_source'] # set our subest to the full dataset result key_attr = False - for key in attr: + for n, key in enumerate(attr): # step through attributes if isinstance(subset, dict): - if key in subset.keys(): + if key in subset.keys(): # continue if keys are found subset = subset[key] - key_attr = subset + if n+1 is len(attr): # if we made it to the end, save this subset + key_attr = subset found_attr[k] = key_attr return found_attr diff --git a/requirements.txt b/requirements.txt index 44368981..d3d9d3d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ api==0.0.7 pennsieve==6.1.1 -boto3==1.10.28 -botocore==1.13.34 +boto3==1.17.67 +botocore==1.20.67 certifi==2019.11.28 chardet==3.0.4 -Click==7.0 +Click==7.1.2 docutils==0.15.2 Flask==1.1.1 flask-marshmallow==0.10.1 @@ -17,16 +17,17 @@ jmespath==0.9.4 MarkupSafe==1.1.1 marshmallow==3.2.2 nose==1.3.7 +osparc==0.4.3 public==2019.4.13 pymongo==3.8.0 python-dateutil==2.8.0 python-dotenv==0.10.3 query-string==2019.4.13 -requests==2.22.0 -s3transfer==0.2.1 +requests==2.25.1 +s3transfer==0.4.2 six==1.13.0 SQLAlchemy==1.3.20 -urllib3==1.25.7 +urllib3==1.26.4 Werkzeug==0.16.0 psycopg2-binary==2.8.6 APScheduler==3.7.0 diff --git a/tests/test_osparc.py b/tests/test_osparc.py new file mode 100644 index 00000000..99138b75 --- /dev/null +++ b/tests/test_osparc.py @@ -0,0 +1,84 @@ +import json +import pytest +from app import app + + +@pytest.fixture +def client(): + app.config['TESTING'] = True + return app.test_client() + + +def test_osparc_no_post(client): + r = client.get('/simulation') + assert r.status_code == 405 + + +def test_osparc_empty_post(client): + r = client.post("/simulation", json={}) + assert r.status_code == 400 + + +def test_osparc_no_json_config(client): + data = { + "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml" + } + r = client.post("/simulation", json=data) + assert r.status_code == 400 + + +def test_osparc_no_model_url(client): + data = { + "json_config": { + "simulation": { + "Ending point": 0.003, + "Point interval": 0.001, + }, + "output": ["Membrane/V"] + } + } + r = client.post("/simulation", json=data) + assert r.status_code == 400 + + +def test_osparc_valid_data(client): + data = { + "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml", + "json_config": { + "simulation": { + "Ending point": 0.003, + "Point interval": 0.001, + }, + "output": ["Membrane/V"] + } + } + res = { + "status": "ok", + "results": { + "environment/time": [0.0, 0.001, 0.002, 0.003], + "Membrane/V": [-47.787168, -47.74547155339473, -47.72515226841376, -47.71370033208329] + } + } + r = client.post("/simulation", json=data) + assert r.status_code == 200 + assert json.dumps(json.loads(r.data), sort_keys=True) == json.dumps(res, sort_keys=True) + + +def test_osparc_failing_simulation(client): + data = { + "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml", + "json_config": { + "simulation": { + "Ending point": 3.0, + "Point interval": 1.0, + }, + "output": ["Membrane/V"] + } + } + res = { + "status": "nok", + "description": "the simulation failed" + } + r = client.post("/simulation", json=data) + assert r.status_code == 200 + assert json.dumps(json.loads(r.data), sort_keys=True) == json.dumps(res, sort_keys=True) diff --git a/tests/test_pmr.py b/tests/test_pmr.py new file mode 100644 index 00000000..081bac8d --- /dev/null +++ b/tests/test_pmr.py @@ -0,0 +1,39 @@ +import json +import pytest +from app import app + + +@pytest.fixture +def client(): + app.config['TESTING'] = True + return app.test_client() + + +def test_pmr_latest_exposure_no_post(client): + r = client.get('/pmr_latest_exposure') + assert r.status_code == 405 + + +def test_pmr_latest_exposure_empty_post(client): + r = client.post("/pmr_latest_exposure", json={}) + assert r.status_code == 400 + + +def test_pmr_latest_exposure_workspace_with_latest_exposure(client): + r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://models.physiomeproject.org/workspace/486"}) + assert r.status_code == 200 + data = r.get_json() + assert data["url"] == "https://models.physiomeproject.org/e/611" + + +def test_pmr_latest_exposure_workspace_without_latest_exposure(client): + r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://models.physiomeproject.org/workspace/698"}) + assert r.status_code == 200 + data = r.get_json() + assert data["url"] == "" + + +def test_pmr_latest_exposure_workspace_with_invalid_workspace_url(client): + r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://some.url.com/"}) + print(r.get_json()) + assert r.status_code == 400 diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py index bdc650e0..1f2b9029 100644 --- a/tests/test_scicrunch.py +++ b/tests/test_scicrunch.py @@ -13,6 +13,11 @@ def test_scicrunch_keys(client): assert r.status_code == 200 assert 'numberOfHits' in json.loads(r.data).keys() +def test_scicrunch_dataset_doi(client): + r = client.get('/scicrunch-dataset/DOI%3A10.26275%2Fpzek-91wx') + assert json.loads(r.data)['hits']['hits'][0]['_id'] == "DOI:10.26275/pzek-91wx" + + def test_scicrunch_search(client): r = client.get('/search/heart') assert r.status_code == 200 @@ -23,9 +28,17 @@ def test_scicrunch_all_data(client): assert json.loads(r.data)['numberOfHits'] > 40 def test_scicrunch_filter(client): - r = client.get('/filter-search/', query_string={'term': 'genotype', 'facet': 'heart'}) + r = client.get('/filter-search/', query_string={'term': 'organ', 'facet': 'heart'}) assert json.loads(r.data)['numberOfHits'] > 4 +def test_scicrunch_filter_scaffolds(client): + r = client.get('/filter-search/?facet=scaffolds&term=datasets') + assert json.loads(r.data)['numberOfHits'] > 10 + +def test_scicrunch_filter_simulations(client): + r = client.get('/filter-search/?facet=simulations&term=datasets') + assert json.loads(r.data)['numberOfHits'] > 0 + def test_scicrunch_basic_search(client): r = client.get('/filter-search/Heart/?facet=All+Species&term=species') assert json.loads(r.data)['numberOfHits'] > 10 @@ -39,7 +52,18 @@ def test_scicrunch_combined_facet_text(client): assert json.loads(r.data)['numberOfHits'] > 1 def test_getting_facets(client): - r = client.get('/get-facets/genotype') + r = client.get('/get-facets/organ') facet_results = json.loads(r.data) facets = [facet_result['key'] for facet_result in facet_results] assert 'heart' in facets + +def test_scaffold_files(client): + r = client.get('/filter-search/?facet=scaffolds&term=datasets&size=40') + results = json.loads(r.data) + assert results['numberOfHits'] > 0 + for item in results['results']: + uri = item['pennsieve']['uri'] + path = item['scaffolds'][0]['dataset']['path'] + key = f"{uri}files/{path}".replace('s3://pennsieve-prod-discover-publish-use1/', '') + r = client.get(f"/s3-resource/{key}") + assert r.status_code == 200