-
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
First part for implementaion of a Europe PMC fetcher #13389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
c307eb4
First part for implementaion of a Europe PMC fetcher
Siedlerchr 62d9a2d
fix checkstyle and var name
Siedlerchr 7dbe91e
fix checkstyle and var name
Siedlerchr 674a2cf
Merge remote-tracking branch 'upstream/main' into euromedline
Siedlerchr 4b097a5
add pages cleanup and add changelog entry
Siedlerchr 8df1f05
remove comment
Siedlerchr File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
151 changes: 151 additions & 0 deletions
151
jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.net.MalformedURLException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
import org.jabref.logic.cleanup.FieldFormatterCleanup; | ||
import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; | ||
import org.jabref.logic.importer.IdBasedParserFetcher; | ||
import org.jabref.logic.importer.ParseException; | ||
import org.jabref.logic.importer.Parser; | ||
import org.jabref.logic.importer.util.JsonReader; | ||
import org.jabref.model.entry.Author; | ||
import org.jabref.model.entry.AuthorList; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.Month; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.field.UnknownField; | ||
import org.jabref.model.entry.types.EntryType; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
|
||
import kong.unirest.core.json.JSONArray; | ||
import kong.unirest.core.json.JSONException; | ||
import kong.unirest.core.json.JSONObject; | ||
|
||
public class EuropePmcFetcher implements IdBasedParserFetcher { | ||
|
||
@Override | ||
public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException { | ||
return URI.create("https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=" + identifier + "&resultType=core&format=json").toURL(); | ||
} | ||
|
||
@Override | ||
public Parser getParser() { | ||
return inputStream -> { | ||
JSONObject response = JsonReader.toJsonObject(inputStream); | ||
if (response.isEmpty()) { | ||
return List.of(); | ||
} | ||
return List.of(jsonItemToBibEntry(response)); | ||
}; | ||
} | ||
|
||
private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { | ||
try { | ||
JSONObject result = item.getJSONObject("resultList").getJSONArray("result").getJSONObject(0); | ||
|
||
System.out.println(result.toString(2)); | ||
|
||
EntryType entryType = StandardEntryType.Article; | ||
if (result.has("pubTypeList")) { | ||
for (Object o : result.getJSONObject("pubTypeList").getJSONArray("pubType")) { | ||
if ("letter".equalsIgnoreCase(o.toString())) { | ||
entryType = StandardEntryType.Article; | ||
break; | ||
// TODO: handle other types e.g. books | ||
} | ||
} | ||
} | ||
|
||
BibEntry entry = new BibEntry(entryType); | ||
|
||
entry.setField(StandardField.TITLE, result.optString("title")); | ||
entry.setField(StandardField.ABSTRACT, result.optString("abstractText")); | ||
|
||
entry.setField(StandardField.YEAR, result.optString("pubYear")); | ||
entry.setField(StandardField.VOLUME, result.optString("journalVolume")); | ||
entry.setField(StandardField.ISSUE, result.optString("journalIssue")); | ||
|
||
String pages = result.optString("pageInfo"); | ||
entry.setField(StandardField.PAGES, pages); | ||
|
||
entry.setField(StandardField.DOI, result.optString("doi")); | ||
entry.setField(StandardField.PMID, result.optString("pmid")); | ||
|
||
// Handle URL | ||
if (result.has("pmid")) { | ||
entry.setField(StandardField.URL, "https://pubmed.ncbi.nlm.nih.gov/" + result.getString("pmid") + "/"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting - Normally our maintable should handle this similar to DOI handling. OK for now, noted down for future work 😋 |
||
} | ||
|
||
if (result.has("journalInfo") && result.getJSONObject("journalInfo").has("issn")) { | ||
entry.setField(StandardField.ISSN, result.getJSONObject("journalInfo").getString("issn")); | ||
} | ||
|
||
// Handle authors | ||
if (result.has("authorList") && result.getJSONObject("authorList").has("author")) { | ||
JSONArray authors = result.getJSONObject("authorList").getJSONArray("author"); | ||
|
||
List<Author> authorList = new ArrayList<>(); | ||
|
||
for (int i = 0; i < authors.length(); i++) { | ||
JSONObject author = authors.getJSONObject(i); | ||
|
||
String lastName = author.optString("lastName", ""); | ||
String firstName = author.optString("firstName", ""); | ||
authorList.add(new Author(firstName, "", "", lastName, "")); | ||
|
||
entry.setField(StandardField.AUTHOR, AuthorList.of(authorList).getAsLastFirstNamesWithAnd(false)); | ||
} | ||
} | ||
|
||
if (result.has("pubTypeList") && result.getJSONObject("pubTypeList").has("pubType")) { | ||
JSONArray pubTypes = result.getJSONObject("pubTypeList").getJSONArray("pubType"); | ||
if (!pubTypes.isEmpty()) { | ||
entry.setField(StandardField.PUBSTATE, pubTypes.getString(0)); | ||
} | ||
} | ||
|
||
if (result.has("pubModel")) { | ||
Optional.ofNullable(result.optString("pubModel")).ifPresent(pubModel -> entry.setField(StandardField.HOWPUBLISHED, pubModel)); | ||
} | ||
if (result.has("publicationStatus")) { | ||
Optional.ofNullable(result.optString("publicationStatus")).ifPresent(pubStatus -> entry.setField(StandardField.PUBSTATE, pubStatus)); | ||
} | ||
|
||
if (result.has("journalInfo")) { | ||
JSONObject journalInfo = result.getJSONObject("journalInfo"); | ||
Optional.ofNullable(journalInfo.optString("issue")).ifPresent(issue -> entry.setField(StandardField.ISSUE, issue)); | ||
Optional.ofNullable(journalInfo.optString("volume")).ifPresent(volume -> entry.setField(StandardField.VOLUME, volume)); | ||
Optional.of(journalInfo.optInt("yearOfPublication")).ifPresent(year -> entry.setField(StandardField.YEAR, year.toString())); | ||
Optional.of(journalInfo.optInt("monthOfPublication")) | ||
.flatMap(month -> Month.parse(month.toString())) | ||
.ifPresent(parsedMonth -> entry.setField(StandardField.MONTH, parsedMonth.getJabRefFormat())); | ||
if (journalInfo.has("journal")) { | ||
JSONObject journal = journalInfo.getJSONObject("journal"); | ||
Optional.ofNullable(journal.optString("title")).ifPresent(title -> entry.setField(StandardField.JOURNAL, title)); | ||
Optional.ofNullable(journal.optString("nlmid")).ifPresent(nlmid -> entry.setField(new UnknownField("nlmid"), nlmid)); | ||
Optional.ofNullable(journal.optString("issn")).ifPresent(issn -> entry.setField(StandardField.ISSN, issn)); | ||
} | ||
} | ||
|
||
return entry; | ||
} catch (JSONException e) { | ||
throw new ParseException("Error parsing EuropePMC response", e); | ||
} | ||
} | ||
|
||
@Override | ||
public void doPostCleanup(BibEntry entry) { | ||
new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()).cleanup(entry); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return "Europe/PMCID"; | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.jabref.logic.importer.fetcher; | ||
|
||
import java.util.Optional; | ||
|
||
import org.jabref.logic.importer.FetcherException; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.field.UnknownField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
import org.jabref.testutils.category.FetcherTest; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
@FetcherTest | ||
class EuropePmcFetcherTest { | ||
|
||
private EuropePmcFetcher fetcher; | ||
private BibEntry entryWijedasa; | ||
|
||
@BeforeEach | ||
void setUp() { | ||
fetcher = new EuropePmcFetcher(); | ||
entryWijedasa = new BibEntry(StandardEntryType.Article) | ||
.withField(StandardField.AUTHOR, "Wijedasa, Lahiru S. and Jauhiainen, Jyrki and Könönen, Mari and Lampela, Maija and Vasander, Harri and Leblanc, Marie-Claire and Evers, Stephanie and Smith, Thomas E. L. and Yule, Catherine M. and Varkkey, Helena and Lupascu, Massimo and Parish, Faizal and Singleton, Ian and Clements, Gopalasamy R. and Aziz, Sheema Abdul and Harrison, Mark E. and Cheyne, Susan and Anshari, Gusti Z. and Meijaard, Erik and Goldstein, Jenny E. and Waldron, Susan and Hergoualc'h, Kristell and Dommain, Rene and Frolking, Steve and Evans, Christopher D. and Posa, Mary Rose C. and Glaser, Paul H. and Suryadiputra, Nyoman and Lubis, Reza and Santika, Truly and Padfield, Rory and Kurnianto, Sofyan and Hadisiswoyo, Panut and Lim, Teck Wyn and Page, Susan E. and Gauci, Vincent and Van Der Meer, Peter J. and Buckland, Helen and Garnier, Fabien and Samuel, Marshall K. and Choo, Liza Nuriati Lim Kim and O'Reilly, Patrick and Warren, Matthew and Suksuwan, Surin and Sumarga, Elham and Jain, Anuj and Laurance, William F. and Couwenberg, John and Joosten, Hans and Vernimmen, Ronald and Hooijer, Aljosja and Malins, Chris and Cochrane, Mark A. and Perumal, Balu and Siegert, Florian and Peh, Kelvin S.-H. and Comeau, Louis-Pierre and Verchot, Louis and Harvey, Charles F. and Cobb, Alex and Jaafar, Zeehan and Wösten, Henk and Manuri, Solichin and Müller, Moritz and Giesen, Wim and Phelps, Jacob and Yong, Ding Li and Silvius, Marcel and Wedeux, Béatrice M. M. and Hoyt, Alison and Osaki, Mitsuru and Hirano, Takashi and Takahashi, Hidenori and Kohyama, Takashi S. and Haraguchi, Akira and Nugroho, Nunung P. and Coomes, David A. and Quoi, Le Phat and Dohong, Alue and Gunawan, Haris and Gaveau, David L. A. and Langner, Andreas and Lim, Felix K. S. and Edwards, David P. and Giam, Xingli and Van Der Werf, Guido and Carmenta, Rachel and Verwer, Caspar C. and Gibson, Luke and Gandois, Laure and Graham, Laura Linda Bozena and Regalino, Jhanson and Wich, Serge A. and Rieley, Jack and Kettridge, Nicholas and Brown, Chloe and Pirard, Romain and Moore, Sam and Capilla, B. Ripoll and Ballhorn, Uwe and Ho, Hua Chew and Hoscilo, Agata and Lohberger, Sandra and Evans, Theodore A. and Yulianti, Nina and Blackham, Grace and Onrizal and Husson, Simon and Murdiyarso, Daniel and Pangala, Sunita and Cole, Lydia E. S. and Tacconi, Luca and Segah, Hendrik and Tonoto, Prayoto and Lee, Janice S. H. and Schmilewski, Gerald and Wulffraat, Stephan and Putra, Erianto Indra and Cattau, Megan E. and Clymo, R. S. and Morrison, Ross and Mujahid, Aazani and Miettinen, Jukka and Liew, Soo Chin and Valpola, Samu and Wilson, David and D'Arcy, Laura and Gerding, Michiel and Sundari, Siti and Thornton, Sara A. and Kalisz, Barbara and Chapman, Stephen J. and Su, Ahmad Suhaizi Mat and Basuki, Imam and Itoh, Masayuki and Traeholt, Carl and Sloan, Sean and Sayok, Alexander K. and Andersen, Roxane") | ||
.withField(StandardField.DOI, "10.1111/gcb.13516") | ||
.withField(StandardField.ISSN, "1354-1013") // there is also an essn | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.withField(StandardField.ISSUE, "3") | ||
.withField(StandardField.JOURNAL, "Global change biology") | ||
.withField(StandardField.MONTH, "#mar#") | ||
.withField(StandardField.PAGES, "977--982") | ||
.withField(StandardField.PMID, "27670948") | ||
.withField(StandardField.HOWPUBLISHED, "Print-Electronic") | ||
.withField(new UnknownField("nlmid"), "9888746") | ||
.withField(StandardField.PUBSTATE, "ppublish") | ||
.withField(StandardField.TITLE, "Denial of long-term issues with agriculture on tropical peatlands will have devastating consequences.") | ||
.withField(StandardField.VOLUME, "23") | ||
.withField(StandardField.URL, "https://pubmed.ncbi.nlm.nih.gov/27670948/") | ||
.withField(StandardField.YEAR, "2017"); | ||
} | ||
|
||
@Test | ||
void searchByIDWijedasa() throws FetcherException { | ||
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("27670948"); | ||
assertTrue(fetchedEntry.isPresent()); | ||
|
||
fetchedEntry.get().clearField(StandardField.ABSTRACT); // Remove abstract due to copyright | ||
Siedlerchr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assertEquals(Optional.of(entryWijedasa), fetchedEntry); | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this also work with the new entry dialog?