Skip to content

Commit

Permalink
various updates
Browse files Browse the repository at this point in the history
  • Loading branch information
keelinm committed Nov 24, 2023
1 parent 5b0da24 commit 29ac2c6
Showing 1 changed file with 105 additions and 32 deletions.
137 changes: 105 additions & 32 deletions scripts/MergeChanges.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,19 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "d377a755",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\keeli\\anaconda3\\envs\\lit-env\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import pandas as pd\n",
"import os\n",
Expand All @@ -15,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "f6cf2984",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -47,44 +56,54 @@
" return df_bib_data"
]
},
{
"cell_type": "markdown",
"id": "5d1be2b7",
"metadata": {},
"source": [
"# Read in the LIVE bib file and the updated bib file we want to merge it with"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "5b5596e1",
"metadata": {},
"outputs": [],
"source": [
"path_diag_bib = os.path.join('..', 'diag.bib')\n",
"diag_bib_raw = read_bibfile(None, path_diag_bib)\n",
"bibfile = from_bib_to_csv(diag_bib_raw)\n",
"path_new_bib = os.path.join('script_data/', 'diag_ss_new.bib')\n",
"# convert the live bib file to csv format\n",
"bibfile_as_csv = from_bib_to_csv(diag_bib_raw)\n",
"path_new_bib = os.path.join('script_data/', 'diag_ss_tmp_new.bib')\n",
"updated_bib_raw = read_bibfile(None, path_new_bib)\n",
"newbibfile = from_bib_to_csv(updated_bib_raw)"
"# convert the updated bibfile to csv format\n",
"newbibfile_as_csv = from_bib_to_csv(updated_bib_raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "98489bba",
"metadata": {},
"outputs": [],
"source": [
"new_bibkeys = newbibfile['bibkey'].tolist()"
"new_bibkeys = newbibfile_as_csv['bibkey'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "2374dcc9",
"metadata": {},
"outputs": [],
"source": [
"old_bibkeys = bibfile['bibkey'].tolist()"
"old_bibkeys = bibfile_as_csv['bibkey'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "f38e2c4c",
"metadata": {},
"outputs": [],
Expand All @@ -98,12 +117,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "366debbe",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"\"KM: I propose that the updated bib file is the one we will keep and we only need to check if it misses anything that is in the original\\nfor entry in diag_bib_raw:\\n if entry.type == 'string':\\n continue\\n bibkey = entry.key\\n if bibkey in new_bibkeys:\\n corresponding_entry = get_entry(updated_bib_raw, bibkey)\\n if 'all_ss_ids' in corresponding_entry.fields:\\n entry.fields['all_ss_ids'] = corresponding_entry.fields['all_ss_ids']\\n if 'pmid' in corresponding_entry.fields:\\n entry.fields['pmid'] = corresponding_entry.fields['pmid']\\n\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Update existing bibitems\n",
"\"\"\"KM: I propose that the updated bib file is the one we will keep and we only need to check if it misses anything that is in the original\n",
"for entry in diag_bib_raw:\n",
" if entry.type == 'string':\n",
" continue\n",
Expand All @@ -113,41 +144,75 @@
" if 'all_ss_ids' in corresponding_entry.fields:\n",
" entry.fields['all_ss_ids'] = corresponding_entry.fields['all_ss_ids']\n",
" if 'pmid' in corresponding_entry.fields:\n",
" entry.fields['pmid'] = corresponding_entry.fields['pmid']"
" entry.fields['pmid'] = corresponding_entry.fields['pmid']\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "550580b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Brou23', 'Twil23c', 'Hump23', 'Anto23a', 'Aswo23', 'Hend23b'}\n"
]
}
],
"source": [
"bibkeys_not_in_updated = set(old_bibkeys)-set(new_bibkeys)\n",
"print(bibkeys_not_in_updated)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "21522e57",
"metadata": {},
"outputs": [],
"source": [
"bibkeys_toadd = set(new_bibkeys)-set(old_bibkeys)\n",
"df_to_add = newbibfile[newbibfile['bibkey'].isin(bibkeys_toadd)]"
"# bibkeys_toadd = set(new_bibkeys)-set(old_bibkeys)\n",
"# df_to_add = newbibfile[newbibfile['bibkey'].isin(bibkeys_toadd)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "3c023620",
"metadata": {},
"outputs": [],
"source": [
"bibkeys_toadd"
"# bibkeys_toadd"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "82d6aeeb",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"appending entry Twil23c\n",
"appending entry Hend23b\n",
"appending entry Anto23a\n",
"appending entry Brou23\n",
"appending entry Hump23\n",
"appending entry Aswo23\n"
]
}
],
"source": [
"for entry in updated_bib_raw:\n",
"for entry in diag_bib_raw:\n",
" bibkey = entry.key\n",
" if bibkey in bibkeys_toadd:\n",
" diag_bib_raw.append(entry)"
" if bibkey in bibkeys_not_in_updated:\n",
" print('appending entry', bibkey)\n",
" updated_bib_raw.append(entry)"
]
},
{
Expand All @@ -157,26 +222,34 @@
"metadata": {},
"outputs": [],
"source": [
"csv=from_bib_to_csv(diag_bib_raw)"
"#csv=from_bib_to_csv(diag_bib_raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "cf941ddf",
"metadata": {},
"outputs": [],
"source": [
"path_diag_bib = os.path.join('..', 'diag_latest_try.bib')\n",
"save_to_file(diag_bib_raw, path_diag_bib)"
"path_diag_bib = os.path.join('..', 'diag_orig_and_ss_merged.bib')\n",
"save_to_file(updated_bib_raw, path_diag_bib)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d491d30",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "lit-env",
"language": "python",
"name": "python3"
"name": "lit-env"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -188,7 +261,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.9.18"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 29ac2c6

Please sign in to comment.