From c307eb408f8eabf00bcf8de59408c8bf6c9bf271 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Sat, 21 Jun 2025 16:54:57 +0200 Subject: [PATCH 1/5] First part for implementaion of a Europe PMC fetcher --- .../jabref/logic/importer/WebFetchers.java | 2 + .../importer/fetcher/EuropePmcFetcher.java | 149 ++++++++++++++++++ .../fetcher/EuropePmcFetcherTest.java | 54 +++++++ 3 files changed, 205 insertions(+) create mode 100644 jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java create mode 100644 jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java diff --git a/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java b/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java index 9cdefe042c1..3cb0f249115 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java +++ b/jablib/src/main/java/org/jabref/logic/importer/WebFetchers.java @@ -26,6 +26,7 @@ import org.jabref.logic.importer.fetcher.DiVA; import org.jabref.logic.importer.fetcher.DoiFetcher; import org.jabref.logic.importer.fetcher.DoiResolution; +import org.jabref.logic.importer.fetcher.EuropePmcFetcher; import org.jabref.logic.importer.fetcher.GvkFetcher; import org.jabref.logic.importer.fetcher.IEEE; import org.jabref.logic.importer.fetcher.INSPIREFetcher; @@ -147,6 +148,7 @@ public static SortedSet getIdBasedFetchers(ImportFormatPreferenc // .addRetryFetcher(new DoiToBibtexConverterComIsbnFetcher(importFormatPreferences))); set.add(new DiVA(importFormatPreferences)); set.add(new DoiFetcher(importFormatPreferences)); + set.add(new EuropePmcFetcher()); set.add(new MedlineFetcher()); set.add(new TitleFetcher(importFormatPreferences)); set.add(new MathSciNet(importFormatPreferences)); diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java new file mode 100644 index 00000000000..3071d4842e4 --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java @@ -0,0 +1,149 @@ +package org.jabref.logic.importer.fetcher; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import org.jabref.logic.importer.IdBasedParserFetcher; +import org.jabref.logic.importer.ParseException; +import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.util.JsonReader; +import org.jabref.model.entry.Author; +import org.jabref.model.entry.AuthorList; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.Month; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.field.UnknownField; +import org.jabref.model.entry.types.EntryType; +import org.jabref.model.entry.types.StandardEntryType; + +import kong.unirest.core.json.JSONArray; +import kong.unirest.core.json.JSONException; +import kong.unirest.core.json.JSONObject; + +public class EuropePmcFetcher implements IdBasedParserFetcher { + + @Override + public URL getUrlForIdentifier(String identifier) throws URISyntaxException, MalformedURLException { + return URI.create("https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=" + identifier + "&resultType=core&format=json").toURL(); + } + + @Override + public Parser getParser() { + return inputStream -> { + JSONObject response = JsonReader.toJsonObject(inputStream); + if (response.isEmpty()) { + return List.of(); + } + return List.of(jsonItemToBibEntry(response)); + }; + } + + private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { + try { + JSONObject result = item.getJSONObject("resultList").getJSONArray("result").getJSONObject(0); + + System.out.println(result.toString(2)); + + EntryType entryType = StandardEntryType.Article; + if (result.has("pubTypeList")) { + for (Object o : result.getJSONObject("pubTypeList").getJSONArray("pubType")) { + if ("letter".equalsIgnoreCase(o.toString())) { + entryType = StandardEntryType.Article; + break; + // TODO: handle other types e.g. books + } + } + } + + BibEntry entry = new BibEntry(entryType); + + entry.setField(StandardField.TITLE, result.optString("title")); + entry.setField(StandardField.ABSTRACT, result.optString("abstractText")); + + entry.setField(StandardField.YEAR, result.optString("pubYear")); + entry.setField(StandardField.VOLUME, result.optString("journalVolume")); + entry.setField(StandardField.ISSUE, result.optString("journalIssue")); + + // Format pages with double dash + String pages = result.optString("pageInfo"); + if (pages != null && !pages.isEmpty()) { + pages = pages.replace("-", "--"); + entry.setField(StandardField.PAGES, pages); + } + + entry.setField(StandardField.DOI, result.optString("doi")); + entry.setField(StandardField.PMID, result.optString("pmid")); + + // Handle URL + if (result.has("pmid")) { + entry.setField(StandardField.URL, "https://pubmed.ncbi.nlm.nih.gov/" + result.getString("pmid") + "/"); + } + + if (result.has("journalInfo") && result.getJSONObject("journalInfo").has("issn")) { + entry.setField(StandardField.ISSN, result.getJSONObject("journalInfo").getString("issn")); + } + + // Handle authors + if (result.has("authorList") && result.getJSONObject("authorList").has("author")) { + JSONArray authors = result.getJSONObject("authorList").getJSONArray("author"); + + List authorList = new ArrayList<>(); + + for (int i = 0; i < authors.length(); i++) { + JSONObject author = authors.getJSONObject(i); + + String lastName = author.optString("lastName", ""); + String firstName = author.optString("firstName", ""); + authorList.add(new Author(firstName, "", "", lastName, "")); + + entry.setField(StandardField.AUTHOR, AuthorList.of(authorList).getAsLastFirstNamesWithAnd(false)); + } + } + + if (result.has("pubTypeList") && result.getJSONObject("pubTypeList").has("pubType")) { + JSONArray pubTypes = result.getJSONObject("pubTypeList").getJSONArray("pubType"); + if (!pubTypes.isEmpty()) { + entry.setField(StandardField.PUBSTATE, pubTypes.getString(0)); + } + } + + if (result.has("pubModel")) { + Optional.ofNullable(result.optString("pubModel")).ifPresent(pubModel -> entry.setField(StandardField.HOWPUBLISHED, pubModel)); + } + if (result.has("publicationStatus")) { + Optional.ofNullable(result.optString("publicationStatus")).ifPresent(pubStatus -> entry.setField(StandardField.PUBSTATE, pubStatus)); + } + + if (result.has("journalInfo")) { + JSONObject journalInfo = result.getJSONObject("journalInfo"); + Optional.ofNullable(journalInfo.optString("issue")).ifPresent(issue -> entry.setField(StandardField.ISSUE, issue)); + Optional.ofNullable(journalInfo.optString("volume")).ifPresent(volumne -> entry.setField(StandardField.VOLUME, volumne)); + Optional.of(journalInfo.optInt("yearOfPublication")).ifPresent(year -> entry.setField(StandardField.YEAR, year.toString())); + Optional.of(journalInfo.optInt("monthOfPublication")) + .flatMap(month -> Month.parse(month.toString())) + .ifPresent(parsedMonth -> entry.setField(StandardField.MONTH, parsedMonth.getJabRefFormat())); + if (journalInfo.has("journal")) { + JSONObject journal = journalInfo.getJSONObject("journal"); + Optional.ofNullable(journal.optString("title")).ifPresent(title -> entry.setField(StandardField.JOURNAL, title)); + Optional.ofNullable(journal.optString("nlmid")).ifPresent(nlmid -> entry.setField(new UnknownField("nlmid"), nlmid)); + Optional.ofNullable(journal.optString("issn")).ifPresent(issn -> entry.setField(StandardField.ISSN, issn)); + } + } + + + return entry; + } catch (JSONException e) { + throw new ParseException("Error parsing EuropePMC response", e); + } + } + + @Override + public String getName() { + return "Europe/PMCID"; + } +} diff --git a/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java b/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java new file mode 100644 index 00000000000..d33d5adc766 --- /dev/null +++ b/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java @@ -0,0 +1,54 @@ +package org.jabref.logic.importer.fetcher; + +import java.util.Optional; + +import org.jabref.logic.importer.FetcherException; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.field.UnknownField; +import org.jabref.model.entry.types.StandardEntryType; +import org.jabref.testutils.category.FetcherTest; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@FetcherTest +class EuropePmcFetcherTest { + + private EuropePmcFetcher fetcher; + private BibEntry entryWijedasa; + + @BeforeEach + void setUp() { + fetcher = new EuropePmcFetcher(); + entryWijedasa = new BibEntry(StandardEntryType.Article) + .withField(StandardField.AUTHOR, "Wijedasa, Lahiru S. and Jauhiainen, Jyrki and Könönen, Mari and Lampela, Maija and Vasander, Harri and Leblanc, Marie-Claire and Evers, Stephanie and Smith, Thomas E. L. and Yule, Catherine M. and Varkkey, Helena and Lupascu, Massimo and Parish, Faizal and Singleton, Ian and Clements, Gopalasamy R. and Aziz, Sheema Abdul and Harrison, Mark E. and Cheyne, Susan and Anshari, Gusti Z. and Meijaard, Erik and Goldstein, Jenny E. and Waldron, Susan and Hergoualc'h, Kristell and Dommain, Rene and Frolking, Steve and Evans, Christopher D. and Posa, Mary Rose C. and Glaser, Paul H. and Suryadiputra, Nyoman and Lubis, Reza and Santika, Truly and Padfield, Rory and Kurnianto, Sofyan and Hadisiswoyo, Panut and Lim, Teck Wyn and Page, Susan E. and Gauci, Vincent and Van Der Meer, Peter J. and Buckland, Helen and Garnier, Fabien and Samuel, Marshall K. and Choo, Liza Nuriati Lim Kim and O'Reilly, Patrick and Warren, Matthew and Suksuwan, Surin and Sumarga, Elham and Jain, Anuj and Laurance, William F. and Couwenberg, John and Joosten, Hans and Vernimmen, Ronald and Hooijer, Aljosja and Malins, Chris and Cochrane, Mark A. and Perumal, Balu and Siegert, Florian and Peh, Kelvin S.-H. and Comeau, Louis-Pierre and Verchot, Louis and Harvey, Charles F. and Cobb, Alex and Jaafar, Zeehan and Wösten, Henk and Manuri, Solichin and Müller, Moritz and Giesen, Wim and Phelps, Jacob and Yong, Ding Li and Silvius, Marcel and Wedeux, Béatrice M. M. and Hoyt, Alison and Osaki, Mitsuru and Hirano, Takashi and Takahashi, Hidenori and Kohyama, Takashi S. and Haraguchi, Akira and Nugroho, Nunung P. and Coomes, David A. and Quoi, Le Phat and Dohong, Alue and Gunawan, Haris and Gaveau, David L. A. and Langner, Andreas and Lim, Felix K. S. and Edwards, David P. and Giam, Xingli and Van Der Werf, Guido and Carmenta, Rachel and Verwer, Caspar C. and Gibson, Luke and Gandois, Laure and Graham, Laura Linda Bozena and Regalino, Jhanson and Wich, Serge A. and Rieley, Jack and Kettridge, Nicholas and Brown, Chloe and Pirard, Romain and Moore, Sam and Capilla, B. Ripoll and Ballhorn, Uwe and Ho, Hua Chew and Hoscilo, Agata and Lohberger, Sandra and Evans, Theodore A. and Yulianti, Nina and Blackham, Grace and Onrizal and Husson, Simon and Murdiyarso, Daniel and Pangala, Sunita and Cole, Lydia E. S. and Tacconi, Luca and Segah, Hendrik and Tonoto, Prayoto and Lee, Janice S. H. and Schmilewski, Gerald and Wulffraat, Stephan and Putra, Erianto Indra and Cattau, Megan E. and Clymo, R. S. and Morrison, Ross and Mujahid, Aazani and Miettinen, Jukka and Liew, Soo Chin and Valpola, Samu and Wilson, David and D'Arcy, Laura and Gerding, Michiel and Sundari, Siti and Thornton, Sara A. and Kalisz, Barbara and Chapman, Stephen J. and Su, Ahmad Suhaizi Mat and Basuki, Imam and Itoh, Masayuki and Traeholt, Carl and Sloan, Sean and Sayok, Alexander K. and Andersen, Roxane") + .withField(StandardField.DOI, "10.1111/gcb.13516") + .withField(StandardField.ISSN, "1354-1013") // there is also an essn + .withField(StandardField.ISSUE, "3") + .withField(StandardField.JOURNAL, "Global change biology") + .withField(StandardField.MONTH, "#mar#") + .withField(StandardField.PAGES, "977--982") + .withField(StandardField.PMID, "27670948") + .withField(StandardField.HOWPUBLISHED, "Print-Electronic") + .withField(new UnknownField("nlmid"), "9888746") + .withField(StandardField.PUBSTATE, "ppublish") + .withField(StandardField.TITLE, "Denial of long-term issues with agriculture on tropical peatlands will have devastating consequences.") + .withField(StandardField.VOLUME, "23") + .withField(StandardField.URL, "https://pubmed.ncbi.nlm.nih.gov/27670948/") + .withField(StandardField.YEAR, "2017"); + + } + + @Test + void searchByIDWijedasa() throws FetcherException { + Optional fetchedEntry = fetcher.performSearchById("27670948"); + assertTrue(fetchedEntry.isPresent()); + + fetchedEntry.get().clearField(StandardField.ABSTRACT); // Remove abstract due to copyright + assertEquals(Optional.of(entryWijedasa), fetchedEntry); + } +} From 62d9a2d04d4806e5311556aea45e537bac6f827a Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Sat, 21 Jun 2025 17:16:26 +0200 Subject: [PATCH 2/5] fix checkstyle and var name --- .../org/jabref/logic/importer/fetcher/EuropePmcFetcher.java | 2 +- .../org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java index 3071d4842e4..775d73e6330 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java @@ -122,7 +122,7 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { if (result.has("journalInfo")) { JSONObject journalInfo = result.getJSONObject("journalInfo"); Optional.ofNullable(journalInfo.optString("issue")).ifPresent(issue -> entry.setField(StandardField.ISSUE, issue)); - Optional.ofNullable(journalInfo.optString("volume")).ifPresent(volumne -> entry.setField(StandardField.VOLUME, volumne)); + Optional.ofNullable(journalInfo.optString("volume")).ifPresent(volume -> entry.setField(StandardField.VOLUME, volume)); Optional.of(journalInfo.optInt("yearOfPublication")).ifPresent(year -> entry.setField(StandardField.YEAR, year.toString())); Optional.of(journalInfo.optInt("monthOfPublication")) .flatMap(month -> Month.parse(month.toString())) diff --git a/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java b/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java index d33d5adc766..10d9df77ee7 100644 --- a/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java +++ b/jablib/src/test/java/org/jabref/logic/importer/fetcher/EuropePmcFetcherTest.java @@ -40,7 +40,6 @@ void setUp() { .withField(StandardField.VOLUME, "23") .withField(StandardField.URL, "https://pubmed.ncbi.nlm.nih.gov/27670948/") .withField(StandardField.YEAR, "2017"); - } @Test From 7dbe91eee4bf7f0e7a10064e0128e7312dc2728a Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Sat, 21 Jun 2025 17:48:31 +0200 Subject: [PATCH 3/5] fix checkstyle and var name --- .../java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java | 1 - 1 file changed, 1 deletion(-) diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java index 775d73e6330..15139136566 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java @@ -135,7 +135,6 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { } } - return entry; } catch (JSONException e) { throw new ParseException("Error parsing EuropePMC response", e); From 4b097a5ce2ed77e70b44b8572ae8e878c40b7504 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Sun, 22 Jun 2025 15:26:38 +0200 Subject: [PATCH 4/5] add pages cleanup and add changelog entry --- CHANGELOG.md | 1 + .../logic/importer/fetcher/EuropePmcFetcher.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76ccf01097a..dc2c95ffe47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We introduced a settings parameter to manage citations' relations local storage time-to-live with a default value set to 30 days. [#11189](https://github.com/JabRef/jabref/issues/11189) - We distribute arm64 images for Linux. [#10842](https://github.com/JabRef/jabref/issues/10842) - We added the field `monthfiled` to the default list of fields to resolve BibTeX-Strings for [#13375](https://github.com/JabRef/jabref/issues/13375) +- We added a new ID based fetcher for [EuropePMC](https://europepmc.org/). [#13389](https://github.com/JabRef/jabref/pull/13389) ### Changed diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java index 15139136566..e29b21b5052 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java @@ -8,6 +8,8 @@ import java.util.List; import java.util.Optional; +import org.jabref.logic.cleanup.FieldFormatterCleanup; +import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; import org.jabref.logic.importer.IdBasedParserFetcher; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; @@ -71,10 +73,7 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { // Format pages with double dash String pages = result.optString("pageInfo"); - if (pages != null && !pages.isEmpty()) { - pages = pages.replace("-", "--"); - entry.setField(StandardField.PAGES, pages); - } + entry.setField(StandardField.PAGES, pages); entry.setField(StandardField.DOI, result.optString("doi")); entry.setField(StandardField.PMID, result.optString("pmid")); @@ -141,6 +140,11 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { } } + @Override + public void doPostCleanup(BibEntry entry) { + new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()).cleanup(entry); + } + @Override public String getName() { return "Europe/PMCID"; From 8df1f054823df76b390cc4be19ca492a00708b76 Mon Sep 17 00:00:00 2001 From: Siedlerchr Date: Sun, 22 Jun 2025 15:39:23 +0200 Subject: [PATCH 5/5] remove comment --- .../java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java | 1 - 1 file changed, 1 deletion(-) diff --git a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java index e29b21b5052..e65d0835e6b 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java +++ b/jablib/src/main/java/org/jabref/logic/importer/fetcher/EuropePmcFetcher.java @@ -71,7 +71,6 @@ private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { entry.setField(StandardField.VOLUME, result.optString("journalVolume")); entry.setField(StandardField.ISSUE, result.optString("journalIssue")); - // Format pages with double dash String pages = result.optString("pageInfo"); entry.setField(StandardField.PAGES, pages);