Skip to content

Commit 049fed2

Browse files
authored
Convert INSPIRE fetcher to new infrastructure (#4146)
* Convert INSPIRE fetcher to new infrastructure * Remove obsolete language keys
1 parent fffb57b commit 049fed2

File tree

6 files changed

+138
-153
lines changed

6 files changed

+138
-153
lines changed

src/main/java/org/jabref/gui/importer/fetcher/EntryFetchers.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ public class EntryFetchers {
1515
public EntryFetchers(JournalAbbreviationLoader abbreviationLoader) {
1616
entryFetchers.add(new CiteSeerXFetcher());
1717
entryFetchers.add(new IEEEXploreFetcher(abbreviationLoader));
18-
entryFetchers.add(new INSPIREFetcher());
1918
// entryFetchers.add(new OAI2Fetcher()); - new arXiv fetcher in place, see below
2019
entryFetchers.add(new ACMPortalFetcher());
2120

src/main/java/org/jabref/gui/importer/fetcher/INSPIREFetcher.java

Lines changed: 0 additions & 150 deletions
This file was deleted.

src/main/java/org/jabref/logic/importer/WebFetchers.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.jabref.logic.importer.fetcher.GoogleScholar;
1818
import org.jabref.logic.importer.fetcher.GvkFetcher;
1919
import org.jabref.logic.importer.fetcher.IEEE;
20+
import org.jabref.logic.importer.fetcher.INSPIREFetcher;
2021
import org.jabref.logic.importer.fetcher.IacrEprintFetcher;
2122
import org.jabref.logic.importer.fetcher.IsbnFetcher;
2223
import org.jabref.logic.importer.fetcher.LibraryOfCongress;
@@ -76,6 +77,7 @@ public static Optional<IdFetcher<? extends Identifier>> getIdFetcherForField(Str
7677
public static List<SearchBasedFetcher> getSearchBasedFetchers(ImportFormatPreferences importFormatPreferences) {
7778
ArrayList<SearchBasedFetcher> list = new ArrayList<>();
7879
list.add(new ArXiv(importFormatPreferences));
80+
list.add(new INSPIREFetcher(importFormatPreferences));
7981
list.add(new GvkFetcher());
8082
list.add(new MedlineFetcher());
8183
list.add(new AstrophysicsDataSystem(importFormatPreferences));
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.io.BufferedReader;
4+
import java.io.InputStreamReader;
5+
import java.net.MalformedURLException;
6+
import java.net.URISyntaxException;
7+
import java.net.URL;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import java.util.regex.Matcher;
11+
import java.util.regex.Pattern;
12+
import java.util.stream.Collectors;
13+
14+
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
15+
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
16+
import org.jabref.logic.help.HelpFile;
17+
import org.jabref.logic.importer.FetcherException;
18+
import org.jabref.logic.importer.ImportFormatPreferences;
19+
import org.jabref.logic.importer.Parser;
20+
import org.jabref.logic.importer.SearchBasedParserFetcher;
21+
import org.jabref.logic.importer.fileformat.BibtexParser;
22+
import org.jabref.logic.util.OS;
23+
import org.jabref.model.cleanup.FieldFormatterCleanup;
24+
import org.jabref.model.entry.BibEntry;
25+
import org.jabref.model.entry.FieldName;
26+
import org.jabref.model.util.DummyFileUpdateMonitor;
27+
28+
import org.apache.http.client.utils.URIBuilder;
29+
30+
/**
31+
* Fetches data from the INSPIRE database.
32+
*
33+
* @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
34+
*/
35+
public class INSPIREFetcher implements SearchBasedParserFetcher {
36+
37+
private static final String INSPIRE_HOST = "https://inspirehep.net/search";
38+
39+
private final ImportFormatPreferences preferences;
40+
41+
public INSPIREFetcher(ImportFormatPreferences preferences) {
42+
this.preferences = preferences;
43+
}
44+
45+
@Override
46+
public String getName() {
47+
return "INSPIRE";
48+
}
49+
50+
@Override
51+
public HelpFile getHelpPage() {
52+
return HelpFile.FETCHER_INSPIRE;
53+
}
54+
55+
@Override
56+
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
57+
URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
58+
uriBuilder.addParameter("p", query); // Query
59+
//uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
60+
uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
61+
uriBuilder.addParameter("of", "hx"); // BibTeX format
62+
return uriBuilder.build().toURL();
63+
}
64+
65+
@Override
66+
public Parser getParser() {
67+
// Inspire returns the BibTeX result embedded in HTML
68+
// So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
69+
return inputStream -> {
70+
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
71+
72+
List<BibEntry> entries = new ArrayList<>();
73+
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
74+
Pattern pattern = Pattern.compile("<pre>(?s)(.*)</pre>");
75+
Matcher matcher = pattern.matcher(response);
76+
while (matcher.find()) {
77+
String bibtexEntryString = matcher.group(1);
78+
entries.addAll(bibtexParser.parseEntries(bibtexEntryString));
79+
}
80+
return entries;
81+
};
82+
}
83+
84+
@Override
85+
public void doPostCleanup(BibEntry entry) {
86+
// Remove strange "SLACcitation" field
87+
new FieldFormatterCleanup("SLACcitation", new ClearFormatter()).cleanup(entry);
88+
89+
// Remove braces around content of "title" field
90+
new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry);
91+
}
92+
}

src/main/resources/l10n/JabRef_en.properties

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,10 +1750,8 @@ Error\ Occurred=Error Occurred
17501750
Journal\ file\ %s\ already\ added=Journal file %s already added
17511751
Name\ cannot\ be\ empty=Name cannot be empty
17521752

1753-
Adding\ fetched\ entries=Adding fetched entries
17541753
Display\ keywords\ appearing\ in\ ALL\ entries=Display keywords appearing in ALL entries
17551754
Display\ keywords\ appearing\ in\ ANY\ entry=Display keywords appearing in ANY entry
1756-
Fetching\ entries\ from\ Inspire=Fetching entries from Inspire
17571755
None\ of\ the\ selected\ entries\ have\ titles.=None of the selected entries have titles.
17581756
None\ of\ the\ selected\ entries\ have\ BibTeX\ keys.=None of the selected entries have BibTeX keys.
17591757
Unabbreviate\ journal\ names=Unabbreviate journal names
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package org.jabref.logic.importer.fetcher;
2+
3+
import java.util.Collections;
4+
import java.util.List;
5+
6+
import org.jabref.logic.bibtex.FieldContentParserPreferences;
7+
import org.jabref.logic.importer.ImportFormatPreferences;
8+
import org.jabref.model.entry.BibEntry;
9+
import org.jabref.model.entry.BibtexEntryTypes;
10+
11+
import org.junit.jupiter.api.BeforeEach;
12+
import org.junit.jupiter.api.Test;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.mockito.Mockito.mock;
16+
import static org.mockito.Mockito.when;
17+
18+
class INSPIREFetcherTest {
19+
private INSPIREFetcher fetcher;
20+
21+
@BeforeEach
22+
void setUp() {
23+
ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class);
24+
when(importFormatPreferences.getFieldContentParserPreferences()).thenReturn(mock(FieldContentParserPreferences.class));
25+
fetcher = new INSPIREFetcher(importFormatPreferences);
26+
}
27+
28+
@Test
29+
void searchByQueryFindsEntry() throws Exception {
30+
BibEntry expected = new BibEntry(BibtexEntryTypes.MASTERSTHESIS.getName());
31+
expected.setCiteKey("Diez:2014ppa");
32+
expected.setField("author", "Diez, Tobias");
33+
expected.setField("title", "Slice theorem for Fr\\'echet group actions and covariant symplectic field theory");
34+
expected.setField("school", "Leipzig U.");
35+
expected.setField("year", "2013");
36+
expected.setField("url", "https://inspirehep.net/record/1295621/files/arXiv:1405.2249.pdf");
37+
expected.setField("eprint", "1405.2249");
38+
expected.setField("archivePrefix", "arXiv");
39+
expected.setField("primaryClass", "math-ph");
40+
41+
List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");
42+
assertEquals(Collections.singletonList(expected), fetchedEntries);
43+
}
44+
}

0 commit comments

Comments
 (0)