From 7672b2de95aea95b92d47cde3215ffb9f96aadcb Mon Sep 17 00:00:00 2001 From: Jonatan Asketorp <2598631+k3KAW8Pnf7mkmdSMPHz27@users.noreply.github.com> Date: Sun, 24 Jan 2021 14:53:44 -0500 Subject: [PATCH 01/15] Fix expansion of bracketed expressions in RegExpBasedFileFinder (#7338) --- CHANGELOG.md | 1 + .../citationkeypattern/BracketedPattern.java | 2 +- .../logic/util/io/RegExpBasedFileFinder.java | 78 ++++----- .../BracketedPatternTest.java | 40 +++++ .../util/io/RegExpBasedFileFinderTests.java | 148 +++++++++--------- .../subdirectory/2003_Hippel_209.pdf | Bin .../2017_Gra\305\276ulis_726.pdf" | Bin .../subdirectory/pdfInSubdirectory.pdf | Bin 5 -> 0 bytes 8 files changed, 157 insertions(+), 112 deletions(-) delete mode 100644 src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf delete mode 100644 "src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gra\305\276ulis_726.pdf" delete mode 100644 src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf diff --git a/CHANGELOG.md b/CHANGELOG.md index 2316c209889..3196eb56b50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where the "Find unlinked files" dialog would freeze JabRef on importing. [#7205](https://github.com/JabRef/jabref/issues/7205) - We fixed an issue where the "Find unlinked files" would stop importing when importing a single file failed. [#7206](https://github.com/JabRef/jabref/issues/7206) - We fixed an issue where an exception would be displayed for previewing and preferences when a custom theme has been configured but is missing [#7177](https://github.com/JabRef/jabref/issues/7177) +- We fixed an issue where the regex based file search miss-interpreted specific symbols [#4342](https://github.com/JabRef/jabref/issues/4342) - We fixed an issue where the Harvard RTF exporter used the wrong default file extension. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the Harvard RTF exporter did not use the new authors formatter and therefore did not export "organization" authors correctly. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the field `urldate` was not exported to the corresponding fields `YearAccessed`, `MonthAccessed`, `DayAccessed` in MS Office XML [#7354](https://github.com/JabRef/jabref/issues/7354) diff --git a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java index 0e5a7d4a518..b4d4c203be1 100644 --- a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java +++ b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java @@ -187,7 +187,7 @@ public static String expandBrackets(String pattern, Character keywordDelimiter, * @param database The {@link BibDatabase} for field resolving. May be null. * @return a function accepting a bracketed expression and returning the result of expanding it */ - private static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { + public static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { return (String bracket) -> { String expandedPattern; List fieldParts = parseFieldAndModifiers(bracket); diff --git a/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java b/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java index cdb2d9a03f9..01bcbbf0ba8 100644 --- a/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java +++ b/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java @@ -8,9 +8,9 @@ import java.nio.file.Path; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.function.BiPredicate; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -18,7 +18,6 @@ import java.util.stream.Stream; import org.jabref.logic.citationkeypattern.BracketedPattern; -import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.strings.StringUtil; @@ -28,7 +27,6 @@ class RegExpBasedFileFinder implements FileFinder { private static final Pattern ESCAPE_PATTERN = Pattern.compile("([^\\\\])\\\\([^\\\\])"); - private static final Pattern SQUARE_BRACKETS_PATTERN = Pattern.compile("\\[.*?\\]"); private final String regExp; private final Character keywordDelimiter; @@ -41,21 +39,41 @@ class RegExpBasedFileFinder implements FileFinder { } /** - * Takes a string that contains bracketed expression and expands each of these using getFieldAndFormat. - *

- * Unknown Bracket expressions are silently dropped. + * Creates a Pattern that matches the file name corresponding to the last element of {@code fileParts} with any bracketed patterns expanded. + * + * @throws IOException throws an IOException if a PatternSyntaxException occurs */ - public static String expandBrackets(String bracketString, BibEntry entry, BibDatabase database, - Character keywordDelimiter) { - Matcher matcher = SQUARE_BRACKETS_PATTERN.matcher(bracketString); - StringBuilder expandedStringBuffer = new StringBuilder(); - while (matcher.find()) { - String replacement = BracketedPattern.expandBrackets(matcher.group(), keywordDelimiter, entry, database); - matcher.appendReplacement(expandedStringBuffer, replacement); + private Pattern createFileNamePattern(String[] fileParts, String extensionRegExp, BibEntry entry) throws IOException { + // Protect the extension marker so that it isn't treated as a bracketed pattern + String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER); + + // We need to supply a custom function to deal with the content of a bracketed expression and expandBracketContent is the default function + Function expandBracket = BracketedPattern.expandBracketContent(keywordDelimiter, entry, null); + // but, we want to post-process the expanded content so that it can be used as a regex for finding a file name + Function bracketToFileNameRegex = expandBracket.andThen(RegExpBasedFileFinder::toFileNameRegex); + + String expandedBracketAsFileNameRegex = BracketedPattern.expandBrackets(filePart, bracketToFileNameRegex); + + String fileNamePattern = expandedBracketAsFileNameRegex + .replaceAll(EXT_MARKER, extensionRegExp) // Replace the extension marker + .replaceAll("\\\\\\\\", "\\\\"); + try { + return Pattern.compile('^' + fileNamePattern + '$', Pattern.CASE_INSENSITIVE); + } catch (PatternSyntaxException e) { + throw new IOException(String.format("There is a syntax error in the regular expression %s used to search for files", fileNamePattern), e); } - matcher.appendTail(expandedStringBuffer); + } - return expandedStringBuffer.toString(); + /** + * Helper method for both exact matching (if the file name were not created by JabRef) and cleaned file name matching. + * + * @param expandedContent the expanded content of a bracketed expression + * @return a String representation of a regex matching the expanded content and the expanded content cleaned for file name use + */ + private static String toFileNameRegex(String expandedContent) { + var cleanedContent = FileNameCleaner.cleanFileName(expandedContent); + return expandedContent.equals(cleanedContent) ? Pattern.quote(expandedContent) : + "(" + Pattern.quote(expandedContent) + ")|(" + Pattern.quote(cleanedContent) + ")"; } /** @@ -142,9 +160,7 @@ private List findFile(final BibEntry entry, final Path directory, final St } for (int index = 0; index < (fileParts.length - 1); index++) { - String dirToProcess = fileParts[index]; - dirToProcess = expandBrackets(dirToProcess, entry, null, keywordDelimiter); if (dirToProcess.matches("^.:$")) { // Windows Drive Letter actualDirectory = Path.of(dirToProcess + '/'); @@ -179,33 +195,21 @@ private List findFile(final BibEntry entry, final Path directory, final St resultFiles.addAll(findFile(entry, path, restOfFileString, extensionRegExp)); } } catch (UncheckedIOException ioe) { - throw new IOException(ioe); + throw ioe.getCause(); } } // End process directory information } // Last step: check if the given file can be found in this directory - String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER); - String filenameToLookFor = expandBrackets(filePart, entry, null, keywordDelimiter).replaceAll(EXT_MARKER, extensionRegExp); - - try { - final Pattern toMatch = Pattern.compile('^' + filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + '$', - Pattern.CASE_INSENSITIVE); - BiPredicate matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches(); - resultFiles.addAll(collectFilesWithMatcher(actualDirectory, matcher)); - } catch (UncheckedIOException | PatternSyntaxException e) { - throw new IOException("Could not look for " + filenameToLookFor, e); - } - - return resultFiles; - } - - private List collectFilesWithMatcher(Path actualDirectory, BiPredicate matcher) { + Pattern toMatch = createFileNamePattern(fileParts, extensionRegExp, entry); + BiPredicate matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches(); try (Stream pathStream = Files.find(actualDirectory, 1, matcher, FileVisitOption.FOLLOW_LINKS)) { - return pathStream.collect(Collectors.toList()); - } catch (UncheckedIOException | IOException ioe) { - return Collections.emptyList(); + resultFiles.addAll(pathStream.collect(Collectors.toList())); + } catch (UncheckedIOException uncheckedIOException) { + // Previously, an empty list were returned here on both IOException and UncheckedIOException + throw uncheckedIOException.getCause(); } + return resultFiles; } private boolean isSubDirectory(Path rootDirectory, Path path) { diff --git a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java index bbc0e6eb3ea..16b0ebe59f8 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java @@ -324,4 +324,44 @@ void expandBracketsLastNameWithChineseCharacters() { assertEquals("杨秀群", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } + + @Test + void expandBracketsWithTestCasesFromRegExpBasedFileFinder() { + BibEntry entry = new BibEntry(StandardEntryType.Article).withCitationKey("HipKro03"); + entry.setField(StandardField.AUTHOR, "Eric von Hippel and Georg von Krogh"); + entry.setField(StandardField.TITLE, "Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science"); + entry.setField(StandardField.JOURNAL, "Organization Science"); + entry.setField(StandardField.YEAR, "2003"); + entry.setField(StandardField.VOLUME, "14"); + entry.setField(StandardField.PAGES, "209--223"); + entry.setField(StandardField.NUMBER, "2"); + entry.setField(StandardField.ADDRESS, "Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA"); + entry.setField(StandardField.DOI, "http://dx.doi.org/10.1287/orsc.14.2.209.14992"); + entry.setField(StandardField.ISSN, "1526-5455"); + entry.setField(StandardField.PUBLISHER, "INFORMS"); + + BibDatabase database = new BibDatabase(); + database.insertEntry(entry); + + assertEquals("", BracketedPattern.expandBrackets("", ',', entry, database)); + + assertEquals("dropped", BracketedPattern.expandBrackets("drop[unknownkey]ped", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh", + BracketedPattern.expandBrackets("[author]", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", + BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", + BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database)); + + assertEquals( + "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.", + BracketedPattern.expandBrackets("[author] have published [fulltitle] in [journal].", ',', entry, database)); + + assertEquals( + "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.", + BracketedPattern.expandBrackets("[author] have published [title] in [journal].", ',', entry, database)); + } } diff --git a/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java b/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java index 0cc53fdbc37..0c776e52fdd 100644 --- a/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java +++ b/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java @@ -5,26 +5,33 @@ import java.util.Collections; import java.util.List; -import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.types.StandardEntryType; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; class RegExpBasedFileFinderTests { - - private static final String FILES_DIRECTORY = "src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder"; - private BibDatabase database; + private static final List PDF_EXTENSION = Collections.singletonList("pdf"); + private static final List FILE_NAMES = List.of( + "ACM_IEEE-CS.pdf", + "pdfInDatabase.pdf", + "Regexp from [A-Z].pdf", + "directory/subdirectory/2003_Hippel_209.pdf", + "directory/subdirectory/2017_Gražulis_726.pdf", + "directory/subdirectory/pdfInSubdirectory.pdf", + "directory/subdirectory/GUO ea - INORG CHEM COMMUN 2010 - Ferroelectric Metal Organic Framework (MOF).pdf" + ); + private Path directory; private BibEntry entry; @BeforeEach - void setUp() { - + void setUp(@TempDir Path tempDir) throws Exception { entry = new BibEntry(); entry.setType(StandardEntryType.Article); entry.setCitationKey("HipKro03"); @@ -40,69 +47,98 @@ void setUp() { entry.setField(StandardField.ISSN, "1526-5455"); entry.setField(StandardField.PUBLISHER, "INFORMS"); - database = new BibDatabase(); - database.insertEntry(entry); + // Create default directories and files + directory = tempDir; + Files.createDirectories(directory.resolve("directory/subdirectory")); + for (String fileName : FILE_NAMES) { + Files.createFile(directory.resolve(fileName)); + } } @Test void testFindFiles() throws Exception { // given - BibEntry localEntry = new BibEntry(StandardEntryType.Article); - localEntry.setCitationKey("pdfInDatabase"); - localEntry.setField(StandardField.YEAR, "2001"); - - List extensions = Collections.singletonList("pdf"); + BibEntry localEntry = new BibEntry(StandardEntryType.Article).withCitationKey("pdfInDatabase"); - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("pdfInDatabase.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/pdfInDatabase.pdf")), - result); + assertEquals(expected, result); } @Test void testYearAuthFirstPageFindFiles() throws Exception { // given - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[year]_[auth]_[firstpage].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(entry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(entry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/2003_Hippel_209.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf")), - result); + assertEquals(expected, result); + } + + @Test + void findAssociatedFilesFindFileContainingBracketsFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withField(StandardField.TITLE, "Regexp from [A-Z]"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("[TITLE]\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("Regexp from [A-Z].pdf")); + + assertEquals(pdfFile, result); + } + + @Test + void findAssociatedFilesFindCleanedFileFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withField(StandardField.JOURNAL, "ACM/IEEE-CS"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("[JOURNAL]\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("ACM_IEEE-CS.pdf")); + + assertEquals(pdfFile, result); + } + + @Test + void findAssociatedFilesFindFileContainingParenthesizesFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withCitationKey("Guo_ICC_2010") + .withField(StandardField.TITLE, "Ferroelectric Metal Organic Framework (MOF)") + .withField(StandardField.AUTHOR, "Guo, M. and Cai, H.-L. and Xiong, R.-G.") + .withField(StandardField.JOURNAL, "Inorganic Chemistry Communications") + .withField(StandardField.YEAR, "2010"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/.*[TITLE].*\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("directory/subdirectory/GUO ea - INORG CHEM COMMUN 2010 - Ferroelectric Metal Organic Framework (MOF).pdf")); + + assertEquals(pdfFile, result); } @Test void testAuthorWithDiacritics() throws Exception { // given - BibEntry localEntry = new BibEntry(StandardEntryType.Article); - localEntry.setCitationKey("Grazulis2017"); + BibEntry localEntry = new BibEntry(StandardEntryType.Article).withCitationKey("Grazulis2017"); localEntry.setField(StandardField.YEAR, "2017"); localEntry.setField(StandardField.AUTHOR, "Gražulis, Saulius and O. Kitsune"); localEntry.setField(StandardField.PAGES, "726--729"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[year]_[auth]_[firstpage]\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); - List expected = Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gražulis_726.pdf")); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/2017_Gražulis_726.pdf")); // then - assertEquals(expected.size(), result.size()); - for (int i = 0; i < expected.size(); i++) { - assertTrue(Files.isSameFile(expected.get(i), result.get(i))); - } + assertEquals(expected, result); } @Test @@ -112,17 +148,14 @@ void testFindFileInSubdirectory() throws Exception { localEntry.setCitationKey("pdfInSubdirectory"); localEntry.setField(StandardField.YEAR, "2017"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/pdfInSubdirectory.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf")), - result); + assertEquals(expected, result); } @Test @@ -132,45 +165,12 @@ void testFindFileNonRecursive() throws Exception { localEntry.setCitationKey("pdfInSubdirectory"); localEntry.setField(StandardField.YEAR, "2017"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("*/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); // then assertTrue(result.isEmpty()); } - - @Test - void testExpandBrackets() { - - assertEquals("", RegExpBasedFileFinder.expandBrackets("", entry, database, ',')); - - assertEquals("dropped", RegExpBasedFileFinder.expandBrackets("drop[unknownkey]ped", entry, database, - ',')); - - assertEquals("Eric von Hippel and Georg von Krogh", - RegExpBasedFileFinder.expandBrackets("[author]", entry, database, ',')); - - assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", - RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database, - ',')); - - assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", - RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database, - ',')); - - assertEquals( - "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.", - RegExpBasedFileFinder.expandBrackets("[author] have published [fulltitle] in [journal].", entry, database, - ',')); - - assertEquals( - "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.", - RegExpBasedFileFinder.expandBrackets("[author] have published [title] in [journal].", entry, database, - ',')); - } } diff --git a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git "a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gra\305\276ulis_726.pdf" "b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gra\305\276ulis_726.pdf" deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf deleted file mode 100644 index 3ac0b7d0dd9994c7ec1c3211b7f4d39211d84753..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5 McmXp_sw_zb00m9~>Hq)$ From ae43548c16ca35b9a83a2d34864557cf503c0df4 Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Mon, 25 Jan 2021 00:26:28 +0100 Subject: [PATCH 02/15] Fix handling of URL in file field (#7347) * Add workaround of test not working on Windows Co-authored-by: Dominik Voigt * Change input of throws Co-authored-by: Dominik Voigt * Add link as String as constructor parameter * Add CHANGELOG entry * Move tests for FieldFieldParser to FileFieldParserTest (and fix handling of //) * Fix checkstyle Co-authored-by: Dominik Voigt Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> --- CHANGELOG.md | 3 +- .../logic/importer/util/FileFieldParser.java | 53 +++++-- .../org/jabref/model/entry/LinkedFile.java | 17 +- .../logic/bibtex/FileFieldWriterTest.java | 129 +-------------- .../importer/util/FileFieldParserTest.java | 148 ++++++++++++++++++ 5 files changed, 205 insertions(+), 145 deletions(-) create mode 100644 src/test/java/org/jabref/logic/importer/util/FileFieldParserTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 3196eb56b50..826575581e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where the "Find unlinked files" dialog would freeze JabRef on importing. [#7205](https://github.com/JabRef/jabref/issues/7205) - We fixed an issue where the "Find unlinked files" would stop importing when importing a single file failed. [#7206](https://github.com/JabRef/jabref/issues/7206) - We fixed an issue where an exception would be displayed for previewing and preferences when a custom theme has been configured but is missing [#7177](https://github.com/JabRef/jabref/issues/7177) -- We fixed an issue where the regex based file search miss-interpreted specific symbols [#4342](https://github.com/JabRef/jabref/issues/4342) +- We fixed an issue where URLs in `file` fields could not be handled on Windows. [#7359](https://github.com/JabRef/jabref/issues/7359) +- We fixed an issue where the regex based file search miss-interpreted specific symbols. [#4342](https://github.com/JabRef/jabref/issues/4342) - We fixed an issue where the Harvard RTF exporter used the wrong default file extension. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the Harvard RTF exporter did not use the new authors formatter and therefore did not export "organization" authors correctly. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the field `urldate` was not exported to the corresponding fields `YearAccessed`, `MonthAccessed`, `DayAccessed` in MS Office XML [#7354](https://github.com/JabRef/jabref/issues/7354) diff --git a/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java b/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java index 9c51c7468fc..02632488eed 100644 --- a/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java +++ b/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java @@ -2,6 +2,7 @@ import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.InvalidPathException; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; @@ -17,7 +18,7 @@ public static List parse(String value) { return files; } - List entry = new ArrayList<>(); + List linkedFileData = new ArrayList<>(); StringBuilder sb = new StringBuilder(); boolean inXmlChar = false; boolean escaped = false; @@ -39,30 +40,38 @@ public static List parse(String value) { sb.append(c); inXmlChar = false; } else if (!escaped && (c == ':')) { - entry.add(sb.toString()); + // We are in the next LinkedFile data element + linkedFileData.add(sb.toString()); sb = new StringBuilder(); } else if (!escaped && (c == ';') && !inXmlChar) { - entry.add(sb.toString()); - sb = new StringBuilder(); + linkedFileData.add(sb.toString()); + files.add(convert(linkedFileData)); - files.add(convert(entry)); + // next iteration + sb = new StringBuilder(); } else { sb.append(c); } escaped = false; } if (sb.length() > 0) { - entry.add(sb.toString()); + linkedFileData.add(sb.toString()); } - - if (!entry.isEmpty()) { - files.add(convert(entry)); + if (!linkedFileData.isEmpty()) { + files.add(convert(linkedFileData)); } - return files; } - private static LinkedFile convert(List entry) { + /** + * Converts the given textual representation of a LinkedFile object + * + * SIDE EFFECT: The given entry list is cleared upon completion + * + * @param entry the list of elements in the linked file textual representation + * @return a LinkedFile object + */ + static LinkedFile convert(List entry) { // ensure list has at least 3 fields while (entry.size() < 3) { entry.add(""); @@ -71,17 +80,31 @@ private static LinkedFile convert(List entry) { LinkedFile field = null; if (LinkedFile.isOnlineLink(entry.get(1))) { try { - field = new LinkedFile(new URL(entry.get(1)), entry.get(2)); + field = new LinkedFile(entry.get(0), new URL(entry.get(1)), entry.get(2)); } catch (MalformedURLException ignored) { - // ignored + // in case the URL is malformed, store it nevertheless + field = new LinkedFile(entry.get(0), entry.get(1), entry.get(2)); } } if (field == null) { - field = new LinkedFile(entry.get(0), Path.of(entry.get(1)), entry.get(2)); + String pathStr = entry.get(1); + if (pathStr.contains("//")) { + // In case the path contains //, we assume it is a malformed URL, not a malformed path. + // On linux, the double slash would be converted to a single slash. + field = new LinkedFile(entry.get(0), pathStr, entry.get(2)); + } else { + try { + // there is no Path.isValidPath(String) method + Path path = Path.of(pathStr); + field = new LinkedFile(entry.get(0), path, entry.get(2)); + } catch (InvalidPathException e) { + field = new LinkedFile(entry.get(0), pathStr, entry.get(2)); + } + } } - // link is only mandatory field + // link is the only mandatory field if (field.getDescription().isEmpty() && field.getLink().isEmpty() && !field.getFileType().isEmpty()) { field = new LinkedFile("", Path.of(field.getFileType()), ""); } else if (!field.getDescription().isEmpty() && field.getLink().isEmpty() && field.getFileType().isEmpty()) { diff --git a/src/main/java/org/jabref/model/entry/LinkedFile.java b/src/main/java/org/jabref/model/entry/LinkedFile.java index ba98a8ba14e..3e893d79b1a 100644 --- a/src/main/java/org/jabref/model/entry/LinkedFile.java +++ b/src/main/java/org/jabref/model/entry/LinkedFile.java @@ -33,15 +33,24 @@ public class LinkedFile implements Serializable { private transient StringProperty fileType = new SimpleStringProperty(); public LinkedFile(String description, Path link, String fileType) { + this(Objects.requireNonNull(description), Objects.requireNonNull(link).toString(), Objects.requireNonNull(fileType)); + } + + /** + * Constructor for non-valid paths. We need to parse them, because the GUI needs to render it. + */ + public LinkedFile(String description, String link, String fileType) { this.description.setValue(Objects.requireNonNull(description)); - setLink(Objects.requireNonNull(link).toString()); + setLink(link); this.fileType.setValue(Objects.requireNonNull(fileType)); } public LinkedFile(URL link, String fileType) { - this.description.setValue(""); - setLink(Objects.requireNonNull(link).toString()); - this.fileType.setValue(Objects.requireNonNull(fileType)); + this("", Objects.requireNonNull(link).toString(), Objects.requireNonNull(fileType)); + } + + public LinkedFile(String description, URL link, String fileType) { + this(description, Objects.requireNonNull(link).toString(), Objects.requireNonNull(fileType)); } public StringProperty descriptionProperty() { diff --git a/src/test/java/org/jabref/logic/bibtex/FileFieldWriterTest.java b/src/test/java/org/jabref/logic/bibtex/FileFieldWriterTest.java index 7305ebbcfb3..a9c722a3815 100644 --- a/src/test/java/org/jabref/logic/bibtex/FileFieldWriterTest.java +++ b/src/test/java/org/jabref/logic/bibtex/FileFieldWriterTest.java @@ -1,13 +1,7 @@ package org.jabref.logic.bibtex; -import java.net.MalformedURLException; -import java.net.URL; import java.nio.file.Path; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.jabref.logic.importer.util.FileFieldParser; import org.jabref.model.entry.LinkedFile; import org.junit.jupiter.api.Test; @@ -17,121 +11,6 @@ public class FileFieldWriterTest { - @Test - public void emptyListForEmptyInput() { - String emptyInput = ""; - - assertEquals(Collections.emptyList(), FileFieldParser.parse(emptyInput)); - assertEquals(Collections.emptyList(), FileFieldParser.parse(null)); - } - - @Test - public void parseCorrectInput() { - String input = "Desc:File.PDF:PDF"; - - assertEquals( - Collections.singletonList(new LinkedFile("Desc", Path.of("File.PDF"), "PDF")), - FileFieldParser.parse(input)); - } - - @Test - public void parseCorrectOnlineInput() throws MalformedURLException { - String input = ":http\\://arxiv.org/pdf/2010.08497v1:PDF"; - String inputURL = "http://arxiv.org/pdf/2010.08497v1"; - List expected = Collections.singletonList(new LinkedFile(new URL(inputURL), "PDF")); - - assertEquals(expected, FileFieldParser.parse(input)); - } - - @Test - public void parseFaultyOnlineInput() { - String input = ":htt\\://arxiv.org/pdf/2010.08497v1:PDF"; - String inputURL = "htt://arxiv.org/pdf/2010.08497v1"; - List expected = Collections.singletonList(new LinkedFile("", Path.of(inputURL), "PDF")); - - assertEquals(expected, FileFieldParser.parse(input)); - } - - @Test - public void ingoreMissingDescription() { - String input = ":wei2005ahp.pdf:PDF"; - - assertEquals( - Collections.singletonList(new LinkedFile("", Path.of("wei2005ahp.pdf"), "PDF")), - FileFieldParser.parse(input)); - } - - @Test - public void interpreteLinkAsOnlyMandatoryField() { - String single = "wei2005ahp.pdf"; - String multiple = "wei2005ahp.pdf;other.pdf"; - - assertEquals( - Collections.singletonList(new LinkedFile("", Path.of("wei2005ahp.pdf"), "")), - FileFieldParser.parse(single)); - - assertEquals( - Arrays.asList( - new LinkedFile("", Path.of("wei2005ahp.pdf"), ""), - new LinkedFile("", Path.of("other.pdf"), "")), - FileFieldParser.parse(multiple)); - } - - @Test - public void escapedCharactersInDescription() { - String input = "test\\:\\;:wei2005ahp.pdf:PDF"; - - assertEquals( - Collections.singletonList(new LinkedFile("test:;", Path.of("wei2005ahp.pdf"), "PDF")), - FileFieldParser.parse(input)); - } - - @Test - public void handleXmlCharacters() { - String input = "test,\\;st\\:\\;:wei2005ahp.pdf:PDF"; - - assertEquals( - Collections.singletonList(new LinkedFile("test,st:;", Path.of("wei2005ahp.pdf"), "PDF")), - FileFieldParser.parse(input)); - } - - @Test - public void handleEscapedFilePath() { - String input = "desc:C\\:\\\\test.pdf:PDF"; - - assertEquals( - Collections.singletonList(new LinkedFile("desc", Path.of("C:\\test.pdf"), "PDF")), - FileFieldParser.parse(input)); - } - - @Test - public void subsetOfFieldsResultsInFileLink() { - String descOnly = "file.pdf::"; - String fileOnly = ":file.pdf"; - String typeOnly = "::file.pdf"; - - assertEquals( - Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), - FileFieldParser.parse(descOnly)); - - assertEquals( - Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), - FileFieldParser.parse(fileOnly)); - - assertEquals( - Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), - FileFieldParser.parse(typeOnly)); - } - - @Test - public void tooManySeparators() { - String input = "desc:file.pdf:PDF:asdf"; - - assertEquals( - Collections.singletonList(new LinkedFile("desc", Path.of("file.pdf"), "PDF")), - FileFieldParser.parse(input)); - } - @Test public void testQuoteStandard() { assertEquals("a", FileFieldWriter.quote("a")); @@ -154,10 +33,10 @@ public void testQuoteNull() { @Test public void testEncodeStringArray() { - assertEquals("a:b;c:d", FileFieldWriter.encodeStringArray(new String[][]{{"a", "b"}, {"c", "d"}})); - assertEquals("a:;c:d", FileFieldWriter.encodeStringArray(new String[][]{{"a", ""}, {"c", "d"}})); - assertEquals("a:" + null + ";c:d", FileFieldWriter.encodeStringArray(new String[][]{{"a", null}, {"c", "d"}})); - assertEquals("a:\\:b;c\\;:d", FileFieldWriter.encodeStringArray(new String[][]{{"a", ":b"}, {"c;", "d"}})); + assertEquals("a:b;c:d", FileFieldWriter.encodeStringArray(new String[][] {{"a", "b"}, {"c", "d"}})); + assertEquals("a:;c:d", FileFieldWriter.encodeStringArray(new String[][] {{"a", ""}, {"c", "d"}})); + assertEquals("a:" + null + ";c:d", FileFieldWriter.encodeStringArray(new String[][] {{"a", null}, {"c", "d"}})); + assertEquals("a:\\:b;c\\;:d", FileFieldWriter.encodeStringArray(new String[][] {{"a", ":b"}, {"c;", "d"}})); } @Test diff --git a/src/test/java/org/jabref/logic/importer/util/FileFieldParserTest.java b/src/test/java/org/jabref/logic/importer/util/FileFieldParserTest.java new file mode 100644 index 00000000000..11c74196d5c --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/util/FileFieldParserTest.java @@ -0,0 +1,148 @@ +package org.jabref.logic.importer.util; + +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import org.jabref.model.entry.LinkedFile; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class FileFieldParserTest { + + private static Stream testData() { + return Stream.of( + Arguments.of( + new LinkedFile("arXiv Fulltext PDF", "https://arxiv.org/pdf/1109.0517.pdf", "application/pdf"), + List.of("arXiv Fulltext PDF", "https://arxiv.org/pdf/1109.0517.pdf", "application/pdf") + ), + Arguments.of( + new LinkedFile("arXiv Fulltext PDF", "https/://arxiv.org/pdf/1109.0517.pdf", "application/pdf"), + List.of("arXiv Fulltext PDF", "https\\://arxiv.org/pdf/1109.0517.pdf", "application/pdf") + ) + ); + } + + @ParameterizedTest + @MethodSource("testData") + public void check(LinkedFile expected, List input) { + // we need to convert the unmodifiable list to a modifiable because of the side effect of "convert" + assertEquals(expected, FileFieldParser.convert(new ArrayList<>(input))); + } + + private static Stream stringsToParseTestData() throws Exception { + return Stream.of( + // null string + Arguments.of( + Collections.emptyList(), + null + ), + + // empty string + Arguments.of( + Collections.emptyList(), + "" + ), + + // correct input + Arguments.of( + Collections.singletonList(new LinkedFile("Desc", Path.of("File.PDF"), "PDF")), + "Desc:File.PDF:PDF" + ), + + // parseCorrectOnlineInput + Arguments.of( + Collections.singletonList(new LinkedFile(new URL("http://arxiv.org/pdf/2010.08497v1"), "PDF")), + ":http\\://arxiv.org/pdf/2010.08497v1:PDF" + ), + + // parseFaultyOnlineInput + Arguments.of( + Collections.singletonList(new LinkedFile("", "htt://arxiv.org/pdf/2010.08497v1", "PDF")), + ":htt\\://arxiv.org/pdf/2010.08497v1:PDF" + ), + + // parseFaultyArxivOnlineInput + Arguments.of( + Collections.singletonList(new LinkedFile("arXiv Fulltext PDF", "https://arxiv.org/pdf/1109.0517.pdf", "application/pdf")), + "arXiv Fulltext PDF:https\\://arxiv.org/pdf/1109.0517.pdf:application/pdf" + ), + + // ignoreMissingDescription + Arguments.of( + Collections.singletonList(new LinkedFile("", Path.of("wei2005ahp.pdf"), "PDF")), + ":wei2005ahp.pdf:PDF" + ), + + // interpretLinkAsOnlyMandatoryField: single + Arguments.of( + Collections.singletonList(new LinkedFile("", Path.of("wei2005ahp.pdf"), "")), + "wei2005ahp.pdf" + ), + + // interpretLinkAsOnlyMandatoryField: multiple + Arguments.of( + List.of( + new LinkedFile("", Path.of("wei2005ahp.pdf"), ""), + new LinkedFile("", Path.of("other.pdf"), "") + ), + "wei2005ahp.pdf;other.pdf" + ), + + // escapedCharactersInDescription + Arguments.of( + Collections.singletonList(new LinkedFile("test:;", Path.of("wei2005ahp.pdf"), "PDF")), + "test\\:\\;:wei2005ahp.pdf:PDF" + ), + + // handleXmlCharacters + Arguments.of( + Collections.singletonList(new LinkedFile("test,st:;", Path.of("wei2005ahp.pdf"), "PDF")), + "test,\\;st\\:\\;:wei2005ahp.pdf:PDF" + ), + + // handleEscapedFilePath + Arguments.of( + Collections.singletonList(new LinkedFile("desc", Path.of("C:\\test.pdf"), "PDF")), + "desc:C\\:\\\\test.pdf:PDF" + ), + + // subsetOfFieldsResultsInFileLink: description only + Arguments.of( + Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), + "file.pdf::" + ), + + // subsetOfFieldsResultsInFileLink: file only + Arguments.of( + Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), + ":file.pdf" + ), + + // subsetOfFieldsResultsInFileLink: type only + Arguments.of( + Collections.singletonList(new LinkedFile("", Path.of("file.pdf"), "")), + "::file.pdf" + ), + + // tooManySeparators + Arguments.of( + Collections.singletonList(new LinkedFile("desc", Path.of("file.pdf"), "PDF")), + "desc:file.pdf:PDF:asdf" + ) + ); + } + + @ParameterizedTest + @MethodSource("stringsToParseTestData") + public void testParse(List expected, String input) { + assertEquals(expected, FileFieldParser.parse(input)); + } +} From 7117b6197aa6f69f6b8b25f9452e875c9051046b Mon Sep 17 00:00:00 2001 From: Dominik Voigt Date: Tue, 26 Jan 2021 22:40:44 +0100 Subject: [PATCH 03/15] Change format for study definition to yaml (#7126) --- build.gradle | 4 + .../0018-use-Jackson-to-parse-study-yml.md | 56 ++++++ docs/adr/0019-keep-study-as-a-dto.md | 30 +++ src/main/java/module-info.java | 3 + .../org/jabref/logic/crawler/Crawler.java | 6 +- ...a => StudyDatabaseToFetcherConverter.java} | 20 +- .../jabref/logic/crawler/StudyRepository.java | 107 +++++------ .../jabref/logic/crawler/StudyYamlParser.java | 39 ++++ .../java/org/jabref/model/study/Study.java | 173 +++++++++++------- .../org/jabref/model/study/StudyDatabase.java | 67 +++++++ .../model/study/StudyMetaDataField.java | 24 --- .../org/jabref/model/study/StudyQuery.java | 50 +++++ .../org/jabref/logic/crawler/CrawlerTest.java | 29 ++- ... StudyDatabaseToFetcherConverterTest.java} | 11 +- .../logic/crawler/StudyRepositoryTest.java | 91 +++------ .../logic/crawler/StudyYamlParserTest.java | 55 ++++++ .../org/jabref/model/study/StudyTest.java | 94 ---------- .../org/jabref/logic/crawler/study.bib | 37 ---- .../org/jabref/logic/crawler/study.yml | 16 ++ 19 files changed, 533 insertions(+), 379 deletions(-) create mode 100644 docs/adr/0018-use-Jackson-to-parse-study-yml.md create mode 100644 docs/adr/0019-keep-study-as-a-dto.md rename src/main/java/org/jabref/logic/crawler/{LibraryEntryToFetcherConverter.java => StudyDatabaseToFetcherConverter.java} (73%) create mode 100644 src/main/java/org/jabref/logic/crawler/StudyYamlParser.java create mode 100644 src/main/java/org/jabref/model/study/StudyDatabase.java delete mode 100644 src/main/java/org/jabref/model/study/StudyMetaDataField.java create mode 100644 src/main/java/org/jabref/model/study/StudyQuery.java rename src/test/java/org/jabref/logic/crawler/{LibraryEntryToFetcherConverterTest.java => StudyDatabaseToFetcherConverterTest.java} (86%) create mode 100644 src/test/java/org/jabref/logic/crawler/StudyYamlParserTest.java delete mode 100644 src/test/java/org/jabref/model/study/StudyTest.java delete mode 100644 src/test/resources/org/jabref/logic/crawler/study.bib create mode 100644 src/test/resources/org/jabref/logic/crawler/study.yml diff --git a/build.gradle b/build.gradle index cd75b24eef4..2273d0a127a 100644 --- a/build.gradle +++ b/build.gradle @@ -42,6 +42,7 @@ java { application { mainClassName = "org.jabref.gui.JabRefLauncher" + mainModule = 'org.jabref' } // TODO: Ugly workaround to temporarily ignore build errors to dependencies of latex2unicode @@ -132,6 +133,9 @@ dependencies { implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '5.10.0.202012080955-r' + implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.12.0-rc2' + implementation group: 'com.fasterxml.jackson.datatype', name: 'jackson-datatype-jsr310', version: '2.12.0-rc2' + implementation group: 'org.mariadb.jdbc', name: 'mariadb-java-client', version: '2.7.1' implementation 'org.postgresql:postgresql:42.2.18' diff --git a/docs/adr/0018-use-Jackson-to-parse-study-yml.md b/docs/adr/0018-use-Jackson-to-parse-study-yml.md new file mode 100644 index 00000000000..70903544d58 --- /dev/null +++ b/docs/adr/0018-use-Jackson-to-parse-study-yml.md @@ -0,0 +1,56 @@ +# Use Jackson to parse study.yml + +## Context and Problem Statement + +The study definition file is formulated as a YAML document. +To accessed the definition within JabRef this document has to be parsed. +What parser should be used to parse YAML files? + +## Considered Options + +* [Jackson](https://github.com/FasterXML/jackson-dataformat-yaml) +* [SnakeYAML Engine](https://bitbucket.org/asomov/snakeyaml) +* [yamlbeans](https://github.com/EsotericSoftware/yamlbeans) +* [eo-yaml](https://github.com/decorators-squad/eo-yaml) +* Self-written parser + +## Decision Outcome + +Chosen option: Jackson, because as it is a dedicated library for parsing YAML. yamlbeans also seem to be viable. They all offer similar functionality + +## Pros and Cons of the Options + +### Jackson + +* Good, because established YAML parser library +* Good, because supports YAML 1.2 +* Good, because it can parse LocalDate + +### SnakeYAML Engine + +* Good, because established YAML parser library +* Good, because supports YAML 1.2 +* Bad, because cannot parse YAML into Java DTOs, only into [basic Java structures](https://bitbucket.org/asomov/snakeyaml-engine/src/master/), this then has to be assembled into DTOs + +### yamlbeans + +* Good, because established YAML parser library +* Good, because [nice getting started page](https://github.com/EsotericSoftware/yamlbeans) +* Bad, because objects need to be annotated in the yaml file to be parsed into Java objects + +### eo-yaml + +* Good, because established YAML parser library +* Good, because supports YAML 1.2 +* Bad, because cannot parse YAML into Java DTOs + +### Own parser + +* Good, because easily customizable +* Bad, because high effort +* Bad, because has to be tested extensively + +## Links + +* [Winery's ADR-0009](https://github.com/eclipse/winery/blob/master/docs/adr/0009-manual-tosca-yaml-serialisation.md) +* [Winery's ADR-0010](https://github.com/eclipse/winery/blob/master/docs/adr/0010-tosca-yaml-deserialisation-using-snakeyaml.md) diff --git a/docs/adr/0019-keep-study-as-a-dto.md b/docs/adr/0019-keep-study-as-a-dto.md new file mode 100644 index 00000000000..bc99d879e31 --- /dev/null +++ b/docs/adr/0019-keep-study-as-a-dto.md @@ -0,0 +1,30 @@ +# Keep study as a DTO + +## Context and Problem Statement + +The study holds query and library entries that could be replaced respectively with complex query and fetcher instances. +This poses the question: should the study remain a pure DTO object or should it contain direct object instances? + +## Considered Options + +* Keep study as DTO and use transformers +* Replace entries with instances + +## Decision Outcome + +Chosen option: "Keep study as DTO and use transformators", because comes out best (see below). + +## Pros and Cons of the Options + +### Keep study as DTO and use transformators + +* Good, because no need for custom serialization +* Good, because deactivated fetchers can be documented (important for traceable Searching (SLRs)) +* Bad, because Entries for databases and queries needed + +### Replace entries with instances + +* Good, because no need for database and query entries +* Bad, because custom de-/serializers for fetchers and complex queries needed +* Bad, because harder to maintain than using "vanilla" jackson de-/serialization +* … diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index 0bee3c9dfd2..3619fe63b0c 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -88,4 +88,7 @@ requires lucene.queryparser; requires lucene.core; requires org.eclipse.jgit; + requires com.fasterxml.jackson.databind; + requires com.fasterxml.jackson.dataformat.yaml; + requires com.fasterxml.jackson.datatype.jsr310; } diff --git a/src/main/java/org/jabref/logic/crawler/Crawler.java b/src/main/java/org/jabref/logic/crawler/Crawler.java index 50feab3cdab..898af4ffbdb 100644 --- a/src/main/java/org/jabref/logic/crawler/Crawler.java +++ b/src/main/java/org/jabref/logic/crawler/Crawler.java @@ -11,7 +11,6 @@ import org.jabref.logic.preferences.TimestampPreferences; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.study.QueryResult; -import org.jabref.model.study.Study; import org.jabref.model.util.FileUpdateMonitor; import org.eclipse.jgit.api.errors.GitAPIException; @@ -36,9 +35,8 @@ public class Crawler { public Crawler(Path studyDefinitionFile, GitHandler gitHandler, FileUpdateMonitor fileUpdateMonitor, ImportFormatPreferences importFormatPreferences, SavePreferences savePreferences, TimestampPreferences timestampPreferences, BibEntryTypesManager bibEntryTypesManager) throws IllegalArgumentException, IOException, ParseException, GitAPIException { Path studyRepositoryRoot = studyDefinitionFile.getParent(); studyRepository = new StudyRepository(studyRepositoryRoot, gitHandler, importFormatPreferences, fileUpdateMonitor, savePreferences, timestampPreferences, bibEntryTypesManager); - Study study = studyRepository.getStudy(); - LibraryEntryToFetcherConverter libraryEntryToFetcherConverter = new LibraryEntryToFetcherConverter(study.getActiveLibraryEntries(), importFormatPreferences); - this.studyFetcher = new StudyFetcher(libraryEntryToFetcherConverter.getActiveFetchers(), study.getSearchQueryStrings()); + StudyDatabaseToFetcherConverter studyDatabaseToFetcherConverter = new StudyDatabaseToFetcherConverter(studyRepository.getActiveLibraryEntries(), importFormatPreferences); + this.studyFetcher = new StudyFetcher(studyDatabaseToFetcherConverter.getActiveFetchers(), studyRepository.getSearchQueryStrings()); } /** diff --git a/src/main/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverter.java b/src/main/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverter.java similarity index 73% rename from src/main/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverter.java rename to src/main/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverter.java index cadf5b2978e..a5b51854cd8 100644 --- a/src/main/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverter.java +++ b/src/main/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverter.java @@ -8,19 +8,16 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.SearchBasedFetcher; import org.jabref.logic.importer.WebFetchers; -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.UnknownField; - -import static org.jabref.model.entry.types.SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY; +import org.jabref.model.study.StudyDatabase; /** * Converts library entries from the given study into their corresponding fetchers. */ -class LibraryEntryToFetcherConverter { - private final List libraryEntries; +class StudyDatabaseToFetcherConverter { + private final List libraryEntries; private final ImportFormatPreferences importFormatPreferences; - public LibraryEntryToFetcherConverter(List libraryEntries, ImportFormatPreferences importFormatPreferences) { + public StudyDatabaseToFetcherConverter(List libraryEntries, ImportFormatPreferences importFormatPreferences) { this.libraryEntries = libraryEntries; this.importFormatPreferences = importFormatPreferences; } @@ -42,9 +39,8 @@ public List getActiveFetchers() { * @param libraryEntries List of entries * @return List of fetcher instances */ - private List getFetchersFromLibraryEntries(List libraryEntries) { + private List getFetchersFromLibraryEntries(List libraryEntries) { return libraryEntries.parallelStream() - .filter(bibEntry -> bibEntry.getType().getName().equals(LIBRARY_ENTRY.getName())) .map(this::createFetcherFromLibraryEntry) .filter(Objects::nonNull) .collect(Collectors.toList()); @@ -53,12 +49,12 @@ private List getFetchersFromLibraryEntries(List li /** * Transforms a library entry into a SearchBasedFetcher instance. This only works if the library entry specifies a supported fetcher. * - * @param libraryEntry the entry that will be converted + * @param studyDatabase the entry that will be converted * @return An instance of the fetcher defined by the library entry. */ - private SearchBasedFetcher createFetcherFromLibraryEntry(BibEntry libraryEntry) { + private SearchBasedFetcher createFetcherFromLibraryEntry(StudyDatabase studyDatabase) { Set searchBasedFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences); - String libraryNameFromFetcher = libraryEntry.getField(new UnknownField("name")).orElse(""); + String libraryNameFromFetcher = studyDatabase.getName(); return searchBasedFetchers.stream() .filter(searchBasedFetcher -> searchBasedFetcher.getName().toLowerCase().equals(libraryNameFromFetcher.toLowerCase())) .findAny() diff --git a/src/main/java/org/jabref/logic/crawler/StudyRepository.java b/src/main/java/org/jabref/logic/crawler/StudyRepository.java index 9971ab9dfb1..99aa896cfd5 100644 --- a/src/main/java/org/jabref/logic/crawler/StudyRepository.java +++ b/src/main/java/org/jabref/logic/crawler/StudyRepository.java @@ -2,14 +2,11 @@ import java.io.FileWriter; import java.io.IOException; -import java.io.InputStream; import java.io.Writer; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; -import java.util.ArrayList; import java.util.List; -import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -22,17 +19,15 @@ import org.jabref.logic.importer.OpenDatabase; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.SearchBasedFetcher; -import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.preferences.TimestampPreferences; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; -import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibEntryTypesManager; -import org.jabref.model.entry.field.UnknownField; -import org.jabref.model.entry.types.SystematicLiteratureReviewStudyEntryType; import org.jabref.model.study.FetchResult; import org.jabref.model.study.QueryResult; import org.jabref.model.study.Study; +import org.jabref.model.study.StudyDatabase; +import org.jabref.model.study.StudyQuery; import org.jabref.model.util.FileUpdateMonitor; import org.eclipse.jgit.api.errors.GitAPIException; @@ -48,13 +43,13 @@ */ class StudyRepository { // Tests work with study.bib - private static final String STUDY_DEFINITION_FILE_NAME = "study.bib"; + private static final String STUDY_DEFINITION_FILE_NAME = "study.yml"; private static final Logger LOGGER = LoggerFactory.getLogger(StudyRepository.class); private static final Pattern MATCHCOLON = Pattern.compile(":"); private static final Pattern MATCHILLEGALCHARACTERS = Pattern.compile("[^A-Za-z0-9_.\\s=-]"); private final Path repositoryPath; - private final Path studyDefinitionBib; + private final Path studyDefinitionFile; private final GitHandler gitHandler; private final Study study; private final ImportFormatPreferences importFormatPreferences; @@ -90,14 +85,14 @@ public StudyRepository(Path pathToRepository, } this.importFormatPreferences = importFormatPreferences; this.fileUpdateMonitor = fileUpdateMonitor; - this.studyDefinitionBib = Path.of(repositoryPath.toString(), STUDY_DEFINITION_FILE_NAME); + this.studyDefinitionFile = Path.of(repositoryPath.toString(), STUDY_DEFINITION_FILE_NAME); this.savePreferences = savePreferences; this.timestampPreferences = timestampPreferences; this.bibEntryTypesManager = bibEntryTypesManager; if (Files.notExists(repositoryPath)) { throw new IOException("The given repository does not exists."); - } else if (Files.notExists(studyDefinitionBib)) { + } else if (Files.notExists(studyDefinitionFile)) { throw new IOException("The study definition file does not exist in the given repository."); } study = parseStudyFile(); @@ -126,30 +121,39 @@ public BibDatabaseContext getStudyResultEntries() throws IOException { } /** - * The study definition file contains all the definitions of a study. This method extracts the BibEntries from the study BiB file. + * The study definition file contains all the definitions of a study. This method extracts this study from the yaml study definition file * * @return Returns the BibEntries parsed from the study definition file. * @throws IOException Problem opening the input stream. * @throws ParseException Problem parsing the study definition file. */ - private Study parseStudyFile() throws IOException, ParseException { - BibtexParser parser = new BibtexParser(importFormatPreferences, fileUpdateMonitor); - List parsedEntries = new ArrayList<>(); - try (InputStream inputStream = Files.newInputStream(studyDefinitionBib)) { - parsedEntries.addAll(parser.parseEntries(inputStream)); - } + private Study parseStudyFile() throws IOException { + return new StudyYamlParser().parseStudyYamlFile(studyDefinitionFile); + } + + /** + * Returns all query strings of the study definition + * + * @return List of all queries as Strings. + */ + public List getSearchQueryStrings() { + return study.getQueries() + .parallelStream() + .map(StudyQuery::getQuery) + .collect(Collectors.toList()); + } - BibEntry studyEntry = parsedEntries.parallelStream() - .filter(bibEntry -> bibEntry.getType().equals(SystematicLiteratureReviewStudyEntryType.STUDY_ENTRY)).findAny() - .orElseThrow(() -> new ParseException("Study definition file does not contain a study entry")); - List queryEntries = parsedEntries.parallelStream() - .filter(bibEntry -> bibEntry.getType().equals(SystematicLiteratureReviewStudyEntryType.SEARCH_QUERY_ENTRY)) - .collect(Collectors.toList()); - List libraryEntries = parsedEntries.parallelStream() - .filter(bibEntry -> bibEntry.getType().equals(SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY)) - .collect(Collectors.toList()); - - return new Study(studyEntry, queryEntries, libraryEntries); + /** + * Extracts all active fetchers from the library entries. + * + * @return List of BibEntries of type Library + * @throws IllegalArgumentException If a transformation from Library entry to LibraryDefinition fails + */ + public List getActiveLibraryEntries() throws IllegalArgumentException { + return study.getDatabases() + .parallelStream() + .filter(StudyDatabase::isEnabled) + .collect(Collectors.toList()); } public Study getStudy() { @@ -173,7 +177,7 @@ public void persist(List crawlResults) throws IOException, GitAPIEx } private void persistStudy() throws IOException { - writeResultToFile(studyDefinitionBib, new BibDatabase(study.getAllEntries())); + new StudyYamlParser().writeStudyYamlFile(study, studyDefinitionFile); } /** @@ -181,8 +185,8 @@ private void persistStudy() throws IOException { */ private void setUpRepositoryStructure() throws IOException { // Cannot use stream here since IOException has to be thrown - LibraryEntryToFetcherConverter converter = new LibraryEntryToFetcherConverter(study.getActiveLibraryEntries(), importFormatPreferences); - for (String query : study.getSearchQueryStrings()) { + StudyDatabaseToFetcherConverter converter = new StudyDatabaseToFetcherConverter(this.getActiveLibraryEntries(), importFormatPreferences); + for (String query : this.getSearchQueryStrings()) { createQueryResultFolder(query); converter.getActiveFetchers() .forEach(searchBasedFetcher -> createFetcherResultFile(query, searchBasedFetcher)); @@ -239,14 +243,14 @@ private void createBibFile(Path file) { * Structure: ID-trimmed query * * Examples: - * Input: '(title: test-title AND abstract: Test)' as a query entry with id 1 - * Output: '1 - title= test-title AND abstract= Test' + * Input: '(title: test-title AND abstract: Test)' as a query entry with id 12345678 + * Output: '12345678 - title= test-title AND abstract= Test' * - * Input: 'abstract: Test*' as a query entry with id 1 - * Output: '1 - abstract= Test' + * Input: 'abstract: Test*' as a query entry with id 87654321 + * Output: '87654321 - abstract= Test' * - * Input: '"test driven"' as a query entry with id 1 - * Output: '1 - test driven' + * Input: '"test driven"' as a query entry with id 12348765 + * Output: '12348765 - test driven' * * @param query that is trimmed and combined with its query id * @return a unique folder name for any query. @@ -255,31 +259,20 @@ private String trimNameAndAddID(String query) { // Replace all field: with field= for folder name String trimmedNamed = MATCHCOLON.matcher(query).replaceAll("="); trimmedNamed = MATCHILLEGALCHARACTERS.matcher(trimmedNamed).replaceAll(""); - if (query.length() > 240) { - trimmedNamed = query.substring(0, 240); + String id = computeIDForQuery(query); + // Whole path has to be shorter than 260 + int remainingPathLength = 220 - studyDefinitionFile.toString().length() - id.length(); + if (query.length() > remainingPathLength) { + trimmedNamed = query.substring(0, remainingPathLength); } - String id = findQueryIDByQueryString(query); return id + " - " + trimmedNamed; } /** - * Helper to find the query id for folder name creation. - * Returns the id of the first SearchQuery BibEntry with a query field that matches the given query. - * - * @param query The query whose ID is searched - * @return ID of the query defined in the study definition. + * Helper to compute the query id for folder name creation. */ - private String findQueryIDByQueryString(String query) { - String queryField = "query"; - return study.getSearchQueryEntries() - .parallelStream() - .filter(bibEntry -> bibEntry.getField(new UnknownField(queryField)).orElse("").equals(query)) - .map(BibEntry::getCitationKey) - .filter(Optional::isPresent) - .map(Optional::get) - .findFirst() - .orElseThrow() - .replaceFirst(queryField, ""); + private String computeIDForQuery(String query) { + return String.valueOf(query.hashCode()); } /** diff --git a/src/main/java/org/jabref/logic/crawler/StudyYamlParser.java b/src/main/java/org/jabref/logic/crawler/StudyYamlParser.java new file mode 100644 index 00000000000..df2138434d2 --- /dev/null +++ b/src/main/java/org/jabref/logic/crawler/StudyYamlParser.java @@ -0,0 +1,39 @@ +package org.jabref.logic.crawler; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; + +import org.jabref.model.study.Study; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; + +public class StudyYamlParser { + + /** + * Parses the given yaml study definition file into a study instance + */ + public Study parseStudyYamlFile(Path studyYamlFile) throws IOException { + ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory()); + yamlMapper.registerModule(new JavaTimeModule()); + try (InputStream fileInputStream = new FileInputStream(studyYamlFile.toFile())) { + return yamlMapper.readValue(fileInputStream, Study.class); + } + } + + /** + * Writes the given study instance into a yaml file to the given path + */ + public void writeStudyYamlFile(Study study, Path studyYamlFile) throws IOException { + ObjectMapper yamlMapper = new ObjectMapper(new YAMLFactory().disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER) + .enable(YAMLGenerator.Feature.MINIMIZE_QUOTES)); + yamlMapper.registerModule(new JavaTimeModule()); + yamlMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + yamlMapper.writeValue(studyYamlFile.toFile(), study); + } +} diff --git a/src/main/java/org/jabref/model/study/Study.java b/src/main/java/org/jabref/model/study/Study.java index 37ed6e2328a..382268ce39a 100644 --- a/src/main/java/org/jabref/model/study/Study.java +++ b/src/main/java/org/jabref/model/study/Study.java @@ -1,98 +1,135 @@ package org.jabref.model.study; import java.time.LocalDate; -import java.util.ArrayList; import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; +import java.util.Objects; -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.UnknownField; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonPropertyOrder; /** * This class represents a scientific study. * * This class defines all aspects of a scientific study relevant to the application. It is a proxy for the file based study definition. */ + +@JsonPropertyOrder({"authors", "title", "last-search-date", "research-questions", "queries", "databases"}) public class Study { - private static final String SEARCH_QUERY_FIELD_NAME = "query"; + private List authors; + private String title; + @JsonProperty("last-search-date") + private LocalDate lastSearchDate; + @JsonProperty("research-questions") + private List researchQuestions; + private List queries; + private List databases; - private final BibEntry studyEntry; - private final List queryEntries; - private final List libraryEntries; + public Study(List authors, String title, List researchQuestions, List queryEntries, List databases) { + this.authors = authors; + this.title = title; + this.researchQuestions = researchQuestions; + this.queries = queryEntries; + this.databases = databases; + } - public Study(BibEntry studyEntry, List queryEntries, List libraryEntries) { - this.studyEntry = studyEntry; - this.queryEntries = queryEntries; - this.libraryEntries = libraryEntries; + /** + * Used for Jackson deserialization + */ + public Study() { } - public List getAllEntries() { - List allEntries = new ArrayList<>(); - allEntries.add(studyEntry); - allEntries.addAll(queryEntries); - allEntries.addAll(libraryEntries); - return allEntries; + public List getAuthors() { + return authors; } - /** - * Returns all query strings - * - * @return List of all queries as Strings. - */ - public List getSearchQueryStrings() { - return queryEntries.parallelStream() - .map(bibEntry -> bibEntry.getField(new UnknownField(SEARCH_QUERY_FIELD_NAME))) - .filter(Optional::isPresent) - .map(Optional::get) - .collect(Collectors.toList()); + public void setAuthors(List authors) { + this.authors = authors; } - /** - * This method returns the SearchQuery entries. - * This is required when the BibKey of the search term entry is required in combination with the search query (e.g. - * for the creation of the study repository structure). - */ - public List getSearchQueryEntries() { - return queryEntries; + public List getQueries() { + return queries; } - /** - * Returns a meta data entry of the first study entry found in the study definition file of the provided type. - * - * @param metaDataField The type of requested meta-data - * @return returns the requested meta data type of the first found study entry - * @throws IllegalArgumentException If the study file does not contain a study entry. - */ - public Optional getStudyMetaDataField(StudyMetaDataField metaDataField) throws IllegalArgumentException { - return studyEntry.getField(metaDataField.toField()); + public void setQueries(List queries) { + this.queries = queries; + } + + public LocalDate getLastSearchDate() { + return lastSearchDate; } - /** - * Sets the lastSearchDate field of the study entry - * - * @param date date the last time a search was conducted - */ public void setLastSearchDate(LocalDate date) { - studyEntry.setField(StudyMetaDataField.STUDY_LAST_SEARCH.toField(), date.toString()); + lastSearchDate = date; } - /** - * Extracts all active LibraryEntries from the BibEntries. - * - * @return List of BibEntries of type Library - * @throws IllegalArgumentException If a transformation from Library entry to LibraryDefinition fails - */ - public List getActiveLibraryEntries() throws IllegalArgumentException { - return libraryEntries - .parallelStream() - .filter(bibEntry -> { - // If enabled is not defined, the fetcher is active. - return bibEntry.getField(new UnknownField("enabled")) - .map(enabled -> enabled.equals("true")) - .orElse(true); - }) - .collect(Collectors.toList()); + public List getDatabases() { + return databases; + } + + public void setDatabases(List databases) { + this.databases = databases; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public List getResearchQuestions() { + return researchQuestions; + } + + public void setResearchQuestions(List researchQuestions) { + this.researchQuestions = researchQuestions; + } + + @Override + public String toString() { + return "Study{" + + "authors=" + authors + + ", studyName='" + title + '\'' + + ", lastSearchDate=" + lastSearchDate + + ", researchQuestions=" + researchQuestions + + ", queries=" + queries + + ", libraries=" + databases + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Study study = (Study) o; + + if (getAuthors() != null ? !getAuthors().equals(study.getAuthors()) : study.getAuthors() != null) { + return false; + } + if (getTitle() != null ? !getTitle().equals(study.getTitle()) : study.getTitle() != null) { + return false; + } + if (getLastSearchDate() != null ? !getLastSearchDate().equals(study.getLastSearchDate()) : study.getLastSearchDate() != null) { + return false; + } + if (getResearchQuestions() != null ? !getResearchQuestions().equals(study.getResearchQuestions()) : study.getResearchQuestions() != null) { + return false; + } + if (getQueries() != null ? !getQueries().equals(study.getQueries()) : study.getQueries() != null) { + return false; + } + return getDatabases() != null ? getDatabases().equals(study.getDatabases()) : study.getDatabases() == null; + } + + @Override + public int hashCode() { + return Objects.hashCode(this); } } diff --git a/src/main/java/org/jabref/model/study/StudyDatabase.java b/src/main/java/org/jabref/model/study/StudyDatabase.java new file mode 100644 index 00000000000..ac71a7160c2 --- /dev/null +++ b/src/main/java/org/jabref/model/study/StudyDatabase.java @@ -0,0 +1,67 @@ +package org.jabref.model.study; + +public class StudyDatabase { + private String name; + private boolean enabled; + + public StudyDatabase(String name, boolean enabled) { + this.name = name; + this.enabled = enabled; + } + + /** + * Used for Jackson deserialization + */ + public StudyDatabase() { + // Per default fetcher is activated + this.enabled = true; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public boolean isEnabled() { + return enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + StudyDatabase that = (StudyDatabase) o; + + if (isEnabled() != that.isEnabled()) { + return false; + } + return getName() != null ? getName().equals(that.getName()) : that.getName() == null; + } + + @Override + public int hashCode() { + int result = getName() != null ? getName().hashCode() : 0; + result = 31 * result + (isEnabled() ? 1 : 0); + return result; + } + + @Override + public String toString() { + return "LibraryEntry{" + + "name='" + name + '\'' + + ", enabled=" + enabled + + '}'; + } +} diff --git a/src/main/java/org/jabref/model/study/StudyMetaDataField.java b/src/main/java/org/jabref/model/study/StudyMetaDataField.java deleted file mode 100644 index 6dbea2a2dc8..00000000000 --- a/src/main/java/org/jabref/model/study/StudyMetaDataField.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.jabref.model.study; - -import org.jabref.model.entry.field.Field; -import org.jabref.model.entry.field.StandardField; -import org.jabref.model.entry.field.UnknownField; - -/** - * This enum represents the different fields in the study entry - */ -public enum StudyMetaDataField { - STUDY_NAME(new UnknownField("name")), STUDY_RESEARCH_QUESTIONS(new UnknownField("researchQuestions")), - STUDY_AUTHORS(StandardField.AUTHOR), STUDY_GIT_REPOSITORY(new UnknownField("gitRepositoryURL")), - STUDY_LAST_SEARCH(new UnknownField("lastSearchDate")); - - private final Field field; - - StudyMetaDataField(Field field) { - this.field = field; - } - - public Field toField() { - return this.field; - } -} diff --git a/src/main/java/org/jabref/model/study/StudyQuery.java b/src/main/java/org/jabref/model/study/StudyQuery.java new file mode 100644 index 00000000000..ae5a42b0783 --- /dev/null +++ b/src/main/java/org/jabref/model/study/StudyQuery.java @@ -0,0 +1,50 @@ +package org.jabref.model.study; + +public class StudyQuery { + private String query; + + public StudyQuery(String query) { + this.query = query; + } + + /** + * Used for Jackson deserialization + */ + public StudyQuery() { + + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + StudyQuery that = (StudyQuery) o; + + return getQuery() != null ? getQuery().equals(that.getQuery()) : that.getQuery() == null; + } + + @Override + public int hashCode() { + return getQuery() != null ? getQuery().hashCode() : 0; + } + + @Override + public String toString() { + return "QueryEntry{" + + "query='" + query + '\'' + + '}'; + } +} diff --git a/src/test/java/org/jabref/logic/crawler/CrawlerTest.java b/src/test/java/org/jabref/logic/crawler/CrawlerTest.java index dfa6e11ba9c..b07dc48c59f 100644 --- a/src/test/java/org/jabref/logic/crawler/CrawlerTest.java +++ b/src/test/java/org/jabref/logic/crawler/CrawlerTest.java @@ -38,6 +38,9 @@ class CrawlerTest { TimestampPreferences timestampPreferences; BibEntryTypesManager entryTypesManager; GitHandler gitHandler = mock(GitHandler.class, Answers.RETURNS_DEFAULTS); + String hashCodeQuantum = String.valueOf("Quantum".hashCode()); + String hashCodeCloudComputing = String.valueOf("Cloud Computing".hashCode()); + String hashCodeSoftwareEngineering = String.valueOf("\"Software Engineering\"".hashCode()); @Test public void testWhetherAllFilesAreCreated() throws Exception { @@ -53,26 +56,22 @@ public void testWhetherAllFilesAreCreated() throws Exception { testCrawler.performCrawl(); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "ArXiv.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "ArXiv.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "Springer.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "Springer.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "Springer.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "Springer.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "Springer.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "result.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "result.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "result.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "result.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "result.bib"))); assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "studyResult.bib"))); } private Path getPathToStudyDefinitionFile() { - return tempRepositoryDirectory.resolve("study.bib"); + return tempRepositoryDirectory.resolve("study.yml"); } /** @@ -121,8 +120,8 @@ private void setUpRepository() throws Exception { } private void setUpTestStudyDefinitionFile() throws Exception { - Path destination = tempRepositoryDirectory.resolve("study.bib"); - URL studyDefinition = this.getClass().getResource("study.bib"); + Path destination = tempRepositoryDirectory.resolve("study.yml"); + URL studyDefinition = this.getClass().getResource("study.yml"); FileUtil.copyFile(Path.of(studyDefinition.toURI()), destination, false); } } diff --git a/src/test/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverterTest.java b/src/test/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverterTest.java similarity index 86% rename from src/test/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverterTest.java rename to src/test/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverterTest.java index 013a9c118a7..f850658fefb 100644 --- a/src/test/java/org/jabref/logic/crawler/LibraryEntryToFetcherConverterTest.java +++ b/src/test/java/org/jabref/logic/crawler/StudyDatabaseToFetcherConverterTest.java @@ -14,7 +14,6 @@ import org.jabref.logic.util.io.FileUtil; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.metadata.SaveOrderConfig; -import org.jabref.model.study.Study; import org.jabref.model.util.DummyFileUpdateMonitor; import org.junit.jupiter.api.Assertions; @@ -26,7 +25,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -class LibraryEntryToFetcherConverterTest { +class StudyDatabaseToFetcherConverterTest { ImportFormatPreferences importFormatPreferences; SavePreferences savePreferences; TimestampPreferences timestampPreferences; @@ -53,11 +52,11 @@ void setUpMocks() { @Test public void getActiveFetcherInstances() throws Exception { - Path studyDefinition = tempRepositoryDirectory.resolve("study.bib"); + Path studyDefinition = tempRepositoryDirectory.resolve("study.yml"); copyTestStudyDefinitionFileIntoDirectory(studyDefinition); - Study study = new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager).getStudy(); - LibraryEntryToFetcherConverter converter = new LibraryEntryToFetcherConverter(study.getActiveLibraryEntries(), importFormatPreferences); + StudyRepository studyRepository = new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager); + StudyDatabaseToFetcherConverter converter = new StudyDatabaseToFetcherConverter(studyRepository.getActiveLibraryEntries(), importFormatPreferences); List result = converter.getActiveFetchers(); Assertions.assertEquals(2, result.size()); @@ -66,7 +65,7 @@ public void getActiveFetcherInstances() throws Exception { } private void copyTestStudyDefinitionFileIntoDirectory(Path destination) throws Exception { - URL studyDefinition = this.getClass().getResource("study.bib"); + URL studyDefinition = this.getClass().getResource("study.yml"); FileUtil.copyFile(Path.of(studyDefinition.toURI()), destination, false); } } diff --git a/src/test/java/org/jabref/logic/crawler/StudyRepositoryTest.java b/src/test/java/org/jabref/logic/crawler/StudyRepositoryTest.java index 171fbb70168..8bba87c2a38 100644 --- a/src/test/java/org/jabref/logic/crawler/StudyRepositoryTest.java +++ b/src/test/java/org/jabref/logic/crawler/StudyRepositoryTest.java @@ -9,8 +9,6 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; import org.jabref.logic.bibtex.FieldContentFormatterPreferences; import org.jabref.logic.citationkeypattern.CitationKeyGenerator; @@ -27,13 +25,10 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.entry.field.StandardField; -import org.jabref.model.entry.field.UnknownField; import org.jabref.model.entry.types.StandardEntryType; import org.jabref.model.metadata.SaveOrderConfig; import org.jabref.model.study.FetchResult; import org.jabref.model.study.QueryResult; -import org.jabref.model.study.Study; -import org.jabref.model.study.StudyMetaDataField; import org.jabref.model.util.DummyFileUpdateMonitor; import org.junit.jupiter.api.BeforeEach; @@ -59,12 +54,15 @@ class StudyRepositoryTest { Path tempRepositoryDirectory; StudyRepository studyRepository; GitHandler gitHandler = mock(GitHandler.class, Answers.RETURNS_DEFAULTS); + String hashCodeQuantum = String.valueOf("Quantum".hashCode()); + String hashCodeCloudComputing = String.valueOf("Cloud Computing".hashCode()); + String hashCodeSoftwareEngineering = String.valueOf("\"Software Engineering\"".hashCode()); /** * Set up mocks */ @BeforeEach - public void setUpMocks() { + public void setUpMocks() throws Exception { savePreferences = mock(SavePreferences.class, Answers.RETURNS_DEEP_STUBS); importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS); timestampPreferences = mock(TimestampPreferences.class); @@ -88,6 +86,7 @@ public void setUpMocks() { when(importFormatPreferences.getEncoding()).thenReturn(StandardCharsets.UTF_8); when(timestampPreferences.getTimestampField()).then(invocation -> StandardField.TIMESTAMP); entryTypesManager = new BibEntryTypesManager(); + getTestStudyRepository(); } @Test @@ -97,54 +96,25 @@ void providePathToNonExistentRepositoryThrowsException() { assertThrows(IOException.class, () -> new StudyRepository(nonExistingRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager)); } - @Test - void providePathToExistentRepositoryWithOutStudyDefinitionFileThrowsException() { - assertThrows(IOException.class, () -> new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager)); - } - - /** - * Tests whether the StudyRepository correctly imports the study file. - */ - @Test - void studyFileCorrectlyImported() throws Exception { - setUpTestStudyDefinitionFile(); - List expectedSearchterms = List.of("Quantum", "Cloud Computing", "TestSearchQuery3"); - List expectedActiveFetchersByName = List.of("Springer", "ArXiv"); - - Study study = new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager).getStudy(); - - assertEquals(expectedSearchterms, study.getSearchQueryStrings()); - assertEquals("TestStudyName", study.getStudyMetaDataField(StudyMetaDataField.STUDY_NAME).get()); - assertEquals("Jab Ref", study.getStudyMetaDataField(StudyMetaDataField.STUDY_AUTHORS).get()); - assertEquals("Question1; Question2", study.getStudyMetaDataField(StudyMetaDataField.STUDY_RESEARCH_QUESTIONS).get()); - assertEquals(expectedActiveFetchersByName, study.getActiveLibraryEntries() - .stream() - .filter(bibEntry -> bibEntry.getType().getName().equals("library")) - .map(bibEntry -> bibEntry.getField(new UnknownField("name")).orElse("")) - .collect(Collectors.toList()) - ); - } - /** * Tests whether the file structure of the repository is created correctly from the study definitions file. */ @Test void repositoryStructureCorrectlyCreated() throws Exception { - // When repository is instantiated the directory structure is created - getTestStudyRepository(); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "ArXiv.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "Springer.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "Springer.bib"))); - assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "Springer.bib"))); - assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), "1 - Quantum", "IEEEXplore.bib"))); - assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), "2 - Cloud Computing", "IEEEXplore.bib"))); - assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), "3 - TestSearchQuery3", "IEEEXplore.bib"))); + // When repository is instantiated the directory structure is created + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeSoftwareEngineering + " - Software Engineering"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "ArXiv.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "ArXiv.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeSoftwareEngineering + " - Software Engineering", "ArXiv.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "Springer.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "Springer.bib"))); + assertTrue(Files.exists(Path.of(tempRepositoryDirectory.toString(), hashCodeSoftwareEngineering + " - Software Engineering", "Springer.bib"))); + assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum", "IEEEXplore.bib"))); + assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), hashCodeCloudComputing + " - Cloud Computing", "IEEEXplore.bib"))); + assertTrue(Files.notExists(Path.of(tempRepositoryDirectory.toString(), hashCodeSoftwareEngineering + " - Software Engineering", "IEEEXplore.bib"))); } /** @@ -152,9 +122,8 @@ void repositoryStructureCorrectlyCreated() throws Exception { */ @Test void bibEntriesCorrectlyStored() throws Exception { - StudyRepository repository = getTestStudyRepository(); setUpTestResultFile(); - List result = repository.getFetcherResultEntries("Quantum", "ArXiv").getEntries(); + List result = studyRepository.getFetcherResultEntries("Quantum", "ArXiv").getEntries(); assertEquals(getArXivQuantumMockResults(), result); } @@ -162,7 +131,7 @@ void bibEntriesCorrectlyStored() throws Exception { void fetcherResultsPersistedCorrectly() throws Exception { List mockResults = getMockResults(); - getTestStudyRepository().persist(mockResults); + studyRepository.persist(mockResults); assertEquals(getArXivQuantumMockResults(), getTestStudyRepository().getFetcherResultEntries("Quantum", "ArXiv").getEntries()); assertEquals(getSpringerQuantumMockResults(), getTestStudyRepository().getFetcherResultEntries("Quantum", "Springer").getEntries()); @@ -177,7 +146,7 @@ void mergedResultsPersistedCorrectly() throws Exception { expected.add(getSpringerQuantumMockResults().get(1)); expected.add(getSpringerQuantumMockResults().get(2)); - getTestStudyRepository().persist(mockResults); + studyRepository.persist(mockResults); // All Springer results are duplicates for "Quantum" assertEquals(expected, getTestStudyRepository().getQueryResultEntries("Quantum").getEntries()); @@ -188,25 +157,23 @@ void mergedResultsPersistedCorrectly() throws Exception { void setsLastSearchDatePersistedCorrectly() throws Exception { List mockResults = getMockResults(); - getTestStudyRepository().persist(mockResults); + studyRepository.persist(mockResults); - assertEquals(LocalDate.now().toString(), getTestStudyRepository().getStudy().getStudyMetaDataField(StudyMetaDataField.STUDY_LAST_SEARCH).get()); + assertEquals(LocalDate.now(), getTestStudyRepository().getStudy().getLastSearchDate()); } @Test void studyResultsPersistedCorrectly() throws Exception { List mockResults = getMockResults(); - getTestStudyRepository().persist(mockResults); + studyRepository.persist(mockResults); assertEquals(new HashSet<>(getNonDuplicateBibEntryResult().getEntries()), new HashSet<>(getTestStudyRepository().getStudyResultEntries().getEntries())); } private StudyRepository getTestStudyRepository() throws Exception { - if (Objects.isNull(studyRepository)) { - setUpTestStudyDefinitionFile(); - studyRepository = new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager); - } + setUpTestStudyDefinitionFile(); + studyRepository = new StudyRepository(tempRepositoryDirectory, gitHandler, importFormatPreferences, new DummyFileUpdateMonitor(), savePreferences, timestampPreferences, entryTypesManager); return studyRepository; } @@ -214,8 +181,8 @@ private StudyRepository getTestStudyRepository() throws Exception { * Copies the study definition file into the test repository */ private void setUpTestStudyDefinitionFile() throws Exception { - Path destination = tempRepositoryDirectory.resolve("study.bib"); - URL studyDefinition = this.getClass().getResource("study.bib"); + Path destination = tempRepositoryDirectory.resolve("study.yml"); + URL studyDefinition = this.getClass().getResource("study.yml"); FileUtil.copyFile(Path.of(studyDefinition.toURI()), destination, false); } @@ -224,7 +191,7 @@ private void setUpTestStudyDefinitionFile() throws Exception { * The repository has to exist before this method is called. */ private void setUpTestResultFile() throws Exception { - Path queryDirectory = Path.of(tempRepositoryDirectory.toString(), "1 - Quantum"); + Path queryDirectory = Path.of(tempRepositoryDirectory.toString(), hashCodeQuantum + " - Quantum"); Path resultFileLocation = Path.of(queryDirectory.toString(), "ArXiv" + ".bib"); URL resultFile = this.getClass().getResource("ArXivQuantumMock.bib"); FileUtil.copyFile(Path.of(resultFile.toURI()), resultFileLocation, true); diff --git a/src/test/java/org/jabref/logic/crawler/StudyYamlParserTest.java b/src/test/java/org/jabref/logic/crawler/StudyYamlParserTest.java new file mode 100644 index 00000000000..8fbd419f01f --- /dev/null +++ b/src/test/java/org/jabref/logic/crawler/StudyYamlParserTest.java @@ -0,0 +1,55 @@ +package org.jabref.logic.crawler; + +import java.net.URL; +import java.nio.file.Path; +import java.time.LocalDate; +import java.util.List; + +import org.jabref.logic.util.io.FileUtil; +import org.jabref.model.study.Study; +import org.jabref.model.study.StudyDatabase; +import org.jabref.model.study.StudyQuery; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class StudyYamlParserTest { + @TempDir + static Path testDirectory; + Study expectedStudy; + + @BeforeEach + void setupStudy() throws Exception { + Path destination = testDirectory.resolve("study.yml"); + URL studyDefinition = StudyYamlParser.class.getResource("study.yml"); + FileUtil.copyFile(Path.of(studyDefinition.toURI()), destination, true); + + List authors = List.of("Jab Ref"); + String studyName = "TestStudyName"; + List researchQuestions = List.of("Question1", "Question2"); + List queryEntries = List.of(new StudyQuery("Quantum"), new StudyQuery("Cloud Computing"), new StudyQuery("\"Software Engineering\"")); + List libraryEntries = List.of(new StudyDatabase("Springer", true), new StudyDatabase("ArXiv", true), new StudyDatabase("IEEEXplore", false)); + + expectedStudy = new Study(authors, studyName, researchQuestions, queryEntries, libraryEntries); + expectedStudy.setLastSearchDate(LocalDate.parse("2020-11-26")); + } + + @Test + public void parseStudyFileSuccessfully() throws Exception { + Study study = new StudyYamlParser().parseStudyYamlFile(testDirectory.resolve("study.yml")); + + assertEquals(expectedStudy, study); + } + + @Test + public void writeStudyFileSuccessfully() throws Exception { + new StudyYamlParser().writeStudyYamlFile(expectedStudy, testDirectory.resolve("study.yml")); + + Study study = new StudyYamlParser().parseStudyYamlFile(testDirectory.resolve("study.yml")); + + assertEquals(expectedStudy, study); + } +} diff --git a/src/test/java/org/jabref/model/study/StudyTest.java b/src/test/java/org/jabref/model/study/StudyTest.java deleted file mode 100644 index 9ab34fcd55e..00000000000 --- a/src/test/java/org/jabref/model/study/StudyTest.java +++ /dev/null @@ -1,94 +0,0 @@ -package org.jabref.model.study; - -import java.time.LocalDate; -import java.util.List; - -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.StandardField; -import org.jabref.model.entry.field.UnknownField; -import org.jabref.model.entry.types.SystematicLiteratureReviewStudyEntryType; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class StudyTest { - Study testStudy; - - @BeforeEach - public void setUpTestStudy() { - BibEntry studyEntry = new BibEntry() - .withField(new UnknownField("name"), "TestStudyName") - .withField(StandardField.AUTHOR, "Jab Ref") - .withField(new UnknownField("researchQuestions"), "Question1; Question2") - .withField(new UnknownField("gitRepositoryURL"), "https://github.com/eclipse/jgit.git"); - studyEntry.setType(SystematicLiteratureReviewStudyEntryType.STUDY_ENTRY); - - // Create three SearchTerm entries. - BibEntry searchQuery1 = new BibEntry() - .withField(new UnknownField("query"), "TestSearchQuery1"); - searchQuery1.setType(SystematicLiteratureReviewStudyEntryType.SEARCH_QUERY_ENTRY); - searchQuery1.setCitationKey("query1"); - - BibEntry searchQuery2 = new BibEntry() - .withField(new UnknownField("query"), "TestSearchQuery2"); - searchQuery2.setType(SystematicLiteratureReviewStudyEntryType.SEARCH_QUERY_ENTRY); - searchQuery2.setCitationKey("query2"); - - BibEntry searchQuery3 = new BibEntry() - .withField(new UnknownField("query"), "TestSearchQuery3"); - searchQuery3.setType(SystematicLiteratureReviewStudyEntryType.SEARCH_QUERY_ENTRY); - searchQuery3.setCitationKey("query3"); - - // Create two Library entries - BibEntry library1 = new BibEntry() - .withField(new UnknownField("name"), "acm") - .withField(new UnknownField("enabled"), "false") - .withField(new UnknownField("comment"), "disabled, because no good results"); - library1.setType(SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY); - library1.setCitationKey("library1"); - - BibEntry library2 = new BibEntry() - .withField(new UnknownField("name"), "arxiv") - .withField(new UnknownField("enabled"), "true") - .withField(new UnknownField("Comment"), ""); - library2.setType(SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY); - library2.setCitationKey("library2"); - - testStudy = new Study(studyEntry, List.of(searchQuery1, searchQuery2, searchQuery3), List.of(library1, library2)); - } - - @Test - void getSearchTermsAsStrings() { - List expectedSearchTerms = List.of("TestSearchQuery1", "TestSearchQuery2", "TestSearchQuery3"); - assertEquals(expectedSearchTerms, testStudy.getSearchQueryStrings()); - } - - @Test - void setLastSearchTime() { - LocalDate date = LocalDate.now(); - testStudy.setLastSearchDate(date); - assertEquals(date.toString(), testStudy.getStudyMetaDataField(StudyMetaDataField.STUDY_LAST_SEARCH).get()); - } - - @Test - void getStudyName() { - assertEquals("TestStudyName", testStudy.getStudyMetaDataField(StudyMetaDataField.STUDY_NAME).get()); - } - - @Test - void getStudyAuthor() { - assertEquals("Jab Ref", testStudy.getStudyMetaDataField(StudyMetaDataField.STUDY_AUTHORS).get()); - } - - @Test - void getResearchQuestions() { - assertEquals("Question1; Question2", testStudy.getStudyMetaDataField(StudyMetaDataField.STUDY_RESEARCH_QUESTIONS).get()); - } - - @Test - void getGitRepositoryURL() { - assertEquals("https://github.com/eclipse/jgit.git", testStudy.getStudyMetaDataField(StudyMetaDataField.STUDY_GIT_REPOSITORY).get()); - } -} diff --git a/src/test/resources/org/jabref/logic/crawler/study.bib b/src/test/resources/org/jabref/logic/crawler/study.bib deleted file mode 100644 index 3f9809a82e5..00000000000 --- a/src/test/resources/org/jabref/logic/crawler/study.bib +++ /dev/null @@ -1,37 +0,0 @@ -% Encoding: UTF-8 - -@Study{v10, - name={TestStudyName}, - author={Jab Ref}, - researchQuestions={Question1; Question2}, -} - -@SearchQuery{query1, - query={Quantum}, -} - -@SearchQuery{query2, - query={Cloud Computing}, -} - -@SearchQuery{query3, - query={TestSearchQuery3}, -} - -@Library{library1, - name = {Springer}, - enabled = {true}, - comment = {}, -} - -@Library{library2, - name = {ArXiv}, - enabled = {true}, - comment = {}, -} - -@Library{library3, - name = {IEEEXplore}, - enabled = {false}, - comment = {}, -} diff --git a/src/test/resources/org/jabref/logic/crawler/study.yml b/src/test/resources/org/jabref/logic/crawler/study.yml new file mode 100644 index 00000000000..620edaf0eab --- /dev/null +++ b/src/test/resources/org/jabref/logic/crawler/study.yml @@ -0,0 +1,16 @@ +authors: + - Jab Ref +title: TestStudyName +last-search-date: 2020-11-26 +research-questions: + - Question1 + - Question2 +queries: + - query: Quantum + - query: Cloud Computing + - query: '"Software Engineering"' +databases: + - name: Springer + - name: ArXiv + - name: IEEEXplore + enabled: false From 034cf8c975f2ba77ea2a283cb2301d2ef40cde3a Mon Sep 17 00:00:00 2001 From: Dominik Voigt Date: Tue, 26 Jan 2021 23:43:31 +0100 Subject: [PATCH 04/15] Feature/implement complex queries (#7350) --- .../importer/PagedSearchBasedFetcher.java | 49 +++-- .../PagedSearchBasedParserFetcher.java | 37 +--- .../logic/importer/SearchBasedFetcher.java | 30 ++- .../importer/SearchBasedParserFetcher.java | 20 +- .../importer/fetcher/ACMPortalFetcher.java | 11 +- .../jabref/logic/importer/fetcher/ArXiv.java | 35 +-- .../fetcher/AstrophysicsDataSystem.java | 14 +- .../logic/importer/fetcher/CiteSeer.java | 6 +- ...fComputerScienceBibliographiesFetcher.java | 6 +- .../fetcher/CompositeSearchBasedFetcher.java | 25 +-- .../logic/importer/fetcher/CrossRef.java | 6 +- .../logic/importer/fetcher/DBLPFetcher.java | 6 +- .../logic/importer/fetcher/DOAJFetcher.java | 6 +- .../logic/importer/fetcher/GoogleScholar.java | 30 +-- .../fetcher/GrobidCitationFetcher.java | 45 ++-- .../logic/importer/fetcher/GvkFetcher.java | 41 +--- .../jabref/logic/importer/fetcher/IEEE.java | 54 ++--- .../importer/fetcher/INSPIREFetcher.java | 6 +- .../logic/importer/fetcher/JstorFetcher.java | 41 +--- .../logic/importer/fetcher/MathSciNet.java | 6 +- .../importer/fetcher/MedlineFetcher.java | 57 ++--- .../importer/fetcher/SpringerFetcher.java | 12 +- .../jabref/logic/importer/fetcher/ZbMATH.java | 6 +- .../AbstractQueryTransformer.java | 199 ++++++++++++++++++ .../transformators/ArXivQueryTransformer.java | 77 +++++++ .../transformators/DBLPQueryTransformer.java | 66 ++++++ .../DefaultQueryTransformer.java | 49 +++++ .../transformators/GVKQueryTransformer.java | 67 ++++++ .../transformators/IEEEQueryTransformer.java | 96 +++++++++ .../transformators/JstorQueryTransformer.java | 49 +++++ .../ScholarQueryTransformer.java | 65 ++++++ .../SpringerQueryTransformer.java | 62 ++++++ .../ZbMathQueryTransformer.java | 49 +++++ .../logic/importer/QueryParserTest.java | 2 +- .../logic/importer/fetcher/ArXivTest.java | 4 +- ...puterScienceBibliographiesFetcherTest.java | 9 +- .../CompositeSearchBasedFetcherTest.java | 4 +- .../importer/fetcher/DBLPFetcherTest.java | 3 +- .../fetcher/GrobidCitationFetcherTest.java | 3 +- .../importer/fetcher/GvkFetcherTest.java | 35 ++- .../logic/importer/fetcher/IEEETest.java | 29 +-- .../importer/fetcher/INSPIREFetcherTest.java | 2 +- .../SearchBasedFetcherCapabilityTest.java | 22 +- .../importer/fetcher/SpringerFetcherTest.java | 4 +- .../ArXivQueryTransformerTest.java | 60 ++++++ .../DBLPQueryTransformerTest.java | 54 +++++ .../GVKQueryTransformerTest.java | 54 +++++ .../IEEEQueryTransformerTest.java | 70 ++++++ .../transformators/InfixTransformerTest.java | 97 +++++++++ .../JstorQueryTransformerTest.java | 39 ++++ .../ScholarQueryTransformerTest.java | 59 ++++++ .../SpringerQueryTransformerTest.java | 56 +++++ .../ZbMathQueryTransformerTest.java | 54 +++++ 53 files changed, 1617 insertions(+), 371 deletions(-) create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/AbstractQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/DefaultQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformer.java create mode 100644 src/main/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformer.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/InfixTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformerTest.java create mode 100644 src/test/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformerTest.java diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java index ea547522049..a6b1cd4aea1 100644 --- a/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/PagedSearchBasedFetcher.java @@ -3,34 +3,40 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Optional; -import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.entry.BibEntry; import org.jabref.model.paging.Page; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + public interface PagedSearchBasedFetcher extends SearchBasedFetcher { /** - * @param complexSearchQuery the complex query defining all fielded search parameters - * @param pageNumber requested site number indexed from 0 + * @param luceneQuery the root node of the lucene query + * @param pageNumber requested site number indexed from 0 * @return Page with search results */ - Page performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException; + Page performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException; /** - * @param complexSearchQuery query string that can be parsed into a complex search query - * @param pageNumber requested site number indexed from 0 + * @param searchQuery query string that can be parsed into a lucene query + * @param pageNumber requested site number indexed from 0 * @return Page with search results */ - default Page performSearchPaged(String complexSearchQuery, int pageNumber) throws FetcherException { - if (complexSearchQuery.isBlank()) { - return new Page<>(complexSearchQuery, pageNumber, Collections.emptyList()); + default Page performSearchPaged(String searchQuery, int pageNumber) throws FetcherException { + if (searchQuery.isBlank()) { + return new Page<>(searchQuery, pageNumber, Collections.emptyList()); + } + SyntaxParser parser = new StandardSyntaxParser(); + final String NO_EXPLICIT_FIELD = "default"; + try { + return this.performSearchPaged(parser.parse(searchQuery, NO_EXPLICIT_FIELD), pageNumber); + } catch (QueryNodeParseException e) { + throw new FetcherException("An error occurred during parsing of the query."); } - QueryParser queryParser = new QueryParser(); - Optional generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery); - // Otherwise just use query as a default term - return this.performSearchPaged(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()), pageNumber); } /** @@ -40,13 +46,14 @@ default int getPageSize() { return 20; } - @Override - default List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { - return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent()); + /** + * This method is used to send complex queries using fielded search. + * + * @param luceneQuery the root node of the lucene query + * @return a list of {@link BibEntry}, which are matched by the query (may be empty) + */ + default List performSearch(QueryNode luceneQuery) throws FetcherException { + return new ArrayList<>(performSearchPaged(luceneQuery, 0).getContent()); } - @Override - default List performSearch(String complexSearchQuery) throws FetcherException { - return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent()); - } } diff --git a/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java index 7f7b3380f0b..bbbc848cc99 100644 --- a/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/PagedSearchBasedParserFetcher.java @@ -7,22 +7,23 @@ import java.net.URL; import java.util.List; -import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.entry.BibEntry; import org.jabref.model.paging.Page; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher { @Override - default Page performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException { + default Page performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException { // ADR-0014 URL urlForQuery; try { - urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber); + urlForQuery = getURLForQuery(luceneQuery, pageNumber); } catch (URISyntaxException | MalformedURLException e) { throw new FetcherException("Search URI crafted from complex search query is malformed", e); } - return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery)); + return new Page<>(luceneQuery.toString(), pageNumber, getBibEntries(urlForQuery)); } private List getBibEntries(URL urlForQuery) throws FetcherException { @@ -39,34 +40,18 @@ private List getBibEntries(URL urlForQuery) throws FetcherException { /** * Constructs a URL based on the query, size and page number. - * - * @param query the search query + * @param luceneQuery the search query * @param pageNumber the number of the page indexed from 0 */ - URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException; - - /** - * Constructs a URL based on the query, size and page number. - * - * @param complexSearchQuery the search query - * @param pageNumber the number of the page indexed from 0 - */ - default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException { - return getURLForQuery(complexSearchQuery.toString(), pageNumber); - } - - @Override - default List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { - return SearchBasedParserFetcher.super.performSearch(complexSearchQuery); - } + URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException; @Override - default URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { - return getURLForQuery(query, 0); + default URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { + return getURLForQuery(luceneQuery, 0); } @Override - default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException { - return getComplexQueryURL(query, 0); + default List performSearch(QueryNode luceneQuery) throws FetcherException { + return SearchBasedParserFetcher.super.performSearch(luceneQuery); } } diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java index faeb4ffa6f6..8b860c3eeeb 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java @@ -2,11 +2,16 @@ import java.util.Collections; import java.util.List; -import java.util.Optional; -import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.entry.BibEntry; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer.NO_EXPLICIT_FIELD; + /** * Searches web resources for bibliographic information based on a free-text query. * May return multiple search hits. @@ -16,24 +21,27 @@ public interface SearchBasedFetcher extends WebFetcher { /** * This method is used to send complex queries using fielded search. * - * @param complexSearchQuery the complex search query defining all fielded search parameters + * @param luceneQuery the root node of the lucene query * @return a list of {@link BibEntry}, which are matched by the query (may be empty) */ - List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException; + List performSearch(QueryNode luceneQuery) throws FetcherException; /** * Looks for hits which are matched by the given free-text query. * - * @param complexSearchQuery query string that can be parsed into a complex search query + * @param searchQuery query string that can be parsed into a lucene query * @return a list of {@link BibEntry}, which are matched by the query (may be empty) */ - default List performSearch(String complexSearchQuery) throws FetcherException { - if (complexSearchQuery.isBlank()) { + default List performSearch(String searchQuery) throws FetcherException { + if (searchQuery.isBlank()) { return Collections.emptyList(); } - QueryParser queryParser = new QueryParser(); - Optional generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery); - // Otherwise just use query as a default term - return this.performSearch(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build())); + SyntaxParser parser = new StandardSyntaxParser(); + + try { + return this.performSearch(parser.parse(searchQuery, NO_EXPLICIT_FIELD)); + } catch (QueryNodeParseException e) { + throw new FetcherException("An error occured when parsing the query"); + } } } diff --git a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java index d0817ade6a0..9aadba697b9 100644 --- a/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java +++ b/src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java @@ -8,9 +8,10 @@ import java.util.List; import org.jabref.logic.cleanup.Formatter; -import org.jabref.logic.importer.fetcher.ComplexSearchQuery; import org.jabref.model.entry.BibEntry; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + /** * Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure: *

    @@ -26,14 +27,14 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher { * This method is necessary as the performSearch method does not support certain URL parameters that are used for * fielded search, such as a title, author, or year parameter. * - * @param complexSearchQuery the search query defining all fielded search parameters + * @param luceneQuery the root node of the lucene query */ @Override - default List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { + default List performSearch(QueryNode luceneQuery) throws FetcherException { // ADR-0014 URL urlForQuery; try { - urlForQuery = getURLForQuery(complexSearchQuery); + urlForQuery = getURLForQuery(luceneQuery); } catch (URISyntaxException | MalformedURLException | FetcherException e) { throw new FetcherException("Search URI crafted from complex search query is malformed", e); } @@ -52,22 +53,17 @@ private List getBibEntries(URL urlForQuery) throws FetcherException { } } - default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException { - // Default implementation behaves as getURLForQuery treating complex query as plain string query - return this.getURLForQuery(query.toString()); - } - /** * Returns the parser used to convert the response to a list of {@link BibEntry}. */ Parser getParser(); /** - * Constructs a URL based on the query. + * Constructs a URL based on the lucene query. * - * @param query the search query + * @param luceneQuery the root node of the lucene query */ - URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException; + URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException; /** * Performs a cleanup of the fetched entry. diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java index e36f730366b..d15e555e461 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ACMPortalFetcher.java @@ -11,10 +11,12 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; public class ACMPortalFetcher implements SearchBasedParserFetcher { @@ -36,15 +38,16 @@ public Optional getHelpPage() { return Optional.of(HelpFile.FETCHER_ACM); } - private static String createQueryString(String query) { + private static String createQueryString(QueryNode query) throws FetcherException { + String queryString = new DefaultQueryTransformer().transformLuceneQuery(query).orElse(""); // Query syntax to search for an entry that matches "one" and "two" in any field is: (+one +two) - return "(%252B" + query.trim().replaceAll("\\s+", "%20%252B") + ")"; + return "(%252B" + queryString.trim().replaceAll("\\s+", "%20%252B") + ")"; } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(SEARCH_URL); - uriBuilder.addParameter("query", createQueryString(query)); // Search all fields + uriBuilder.addParameter("query", createQueryString(luceneQuery)); // Search all fields uriBuilder.addParameter("within", "owners.owner=GUIDE"); // Search within the ACM Guide to Computing Literature (encompasses the ACM Full-Text Collection) uriBuilder.addParameter("expformat", "bibtex"); // BibTeX format return uriBuilder.build().toURL(); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java index 9b640cdd7f0..e0569146721 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java @@ -23,6 +23,7 @@ import org.jabref.logic.importer.IdFetcher; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.PagedSearchBasedFetcher; +import org.jabref.logic.importer.fetcher.transformators.ArXivQueryTransformer; import org.jabref.logic.util.io.XMLUtil; import org.jabref.logic.util.strings.StringSimilarity; import org.jabref.model.entry.BibEntry; @@ -36,6 +37,7 @@ import org.jabref.model.util.OptionalUtil; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; @@ -252,25 +254,26 @@ public Optional getHelpPage() { /** * Constructs a complex query string using the field prefixes specified at https://arxiv.org/help/api/user-manual * - * @param complexSearchQuery the search query defining all fielded search parameters + * @param luceneQuery the root node of the lucene query * @return A list of entries matching the complex query */ @Override - public Page performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException { - List searchTerms = new ArrayList<>(); - complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("au:" + author)); - complexSearchQuery.getTitlePhrases().forEach(title -> searchTerms.add("ti:" + title)); - complexSearchQuery.getAbstractPhrases().forEach(abstr -> searchTerms.add("abs:" + abstr)); - complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("jr:" + journal)); - // Since ArXiv API does not support year search, we ignore the year related terms - complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString())); - searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases()); - String complexQueryString = String.join(" AND ", searchTerms); - - List searchResult = searchForEntries(complexQueryString, pageNumber).stream() - .map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator())) - .collect(Collectors.toList()); - return new Page<>(complexQueryString, pageNumber, searchResult); + public Page performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException { + ArXivQueryTransformer transformer = new ArXivQueryTransformer(); + String transformedQuery = transformer.transformLuceneQuery(luceneQuery).orElse(""); + List searchResult = searchForEntries(transformedQuery, pageNumber).stream() + .map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator())) + .collect(Collectors.toList()); + return new Page<>(transformedQuery, pageNumber, filterYears(searchResult, transformer)); + } + + private List filterYears(List searchResult, ArXivQueryTransformer transformer) { + return searchResult.stream() + .filter(entry -> entry.getField(StandardField.DATE).isPresent()) + // Filter the date field for year only + .filter(entry -> transformer.getEndYear().isEmpty() || Integer.parseInt(entry.getField(StandardField.DATE).get().substring(0, 4)) <= transformer.getEndYear().get()) + .filter(entry -> transformer.getStartYear().isEmpty() || Integer.parseInt(entry.getField(StandardField.DATE).get().substring(0, 4)) >= transformer.getStartYear().get()) + .collect(Collectors.toList()); } @Override diff --git a/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java b/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java index a266418b351..6da8f0920d3 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/AstrophysicsDataSystem.java @@ -27,6 +27,7 @@ import org.jabref.logic.importer.PagedSearchBasedParserFetcher; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.net.URLDownload; import org.jabref.logic.util.BuildInfo; @@ -41,6 +42,7 @@ import kong.unirest.json.JSONException; import kong.unirest.json.JSONObject; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * Fetches data from the SAO/NASA Astrophysics Data System (https://ui.adsabs.harvard.edu/) @@ -79,13 +81,13 @@ public String getName() { } /** - * @param query query string, matching the apache solr format + * @param luceneQuery query string, matching the apache solr format * @return URL which points to a search request for given query */ @Override - public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException { + public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder builder = new URIBuilder(API_SEARCH_URL); - builder.addParameter("q", query); + builder.addParameter("q", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); builder.addParameter("fl", "bibcode"); builder.addParameter("rows", String.valueOf(getPageSize())); builder.addParameter("start", String.valueOf(getPageSize() * pageNumber)); @@ -274,12 +276,12 @@ private List performSearchByIds(Collection identifiers) throws } @Override - public Page performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException { + public Page performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException { try { // This is currently just interpreting the complex query as a default string query - List bibcodes = fetchBibcodes(getComplexQueryURL(complexSearchQuery, pageNumber)); + List bibcodes = fetchBibcodes(getURLForQuery(luceneQuery, pageNumber)); Collection results = performSearchByIds(bibcodes); - return new Page<>(complexSearchQuery.toString(), pageNumber, results); + return new Page<>(luceneQuery.toString(), pageNumber, results); } catch (URISyntaxException e) { throw new FetcherException("Search URI is malformed", e); } catch (IOException e) { diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java index 1b51cf04b00..c6e4c2b6ba5 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CiteSeer.java @@ -20,6 +20,7 @@ import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.CoinsParser; import org.jabref.logic.util.OS; import org.jabref.model.entry.BibEntry; @@ -27,6 +28,7 @@ import org.jabref.model.entry.field.StandardField; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; public class CiteSeer implements SearchBasedParserFetcher { @@ -44,10 +46,10 @@ public Optional getHelpPage() { } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder("https://citeseer.ist.psu.edu/search"); uriBuilder.addParameter("sort", "rlv"); // Sort by relevance - uriBuilder.addParameter("q", query); // Query + uriBuilder.addParameter("q", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // Query uriBuilder.addParameter("t", "doc"); // Type: documents // uriBuilder.addParameter("start", "0"); // Start index (not supported at the moment) return uriBuilder.build().toURL(); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java index 3bcbd26a2fb..b6556e0930f 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcher.java @@ -14,6 +14,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.FieldFactory; @@ -21,6 +22,7 @@ import org.jabref.model.entry.field.UnknownField; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; public class CollectionOfComputerScienceBibliographiesFetcher implements SearchBasedParserFetcher { @@ -33,9 +35,9 @@ public CollectionOfComputerScienceBibliographiesFetcher(ImportFormatPreferences } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { return new URIBuilder(BASIC_SEARCH_URL) - .addParameter("query", query) + .addParameter("query", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")) .addParameter("sort", "score") .build() .toURL(); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java index 3ce32ebeee6..c616b018cb1 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcher.java @@ -13,6 +13,7 @@ import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.BibEntry; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,13 +37,23 @@ public CompositeSearchBasedFetcher(Set searchBasedFetchers, } @Override - public List performSearch(ComplexSearchQuery complexSearchQuery) { + public String getName() { + return "SearchAll"; + } + + @Override + public Optional getHelpPage() { + return Optional.empty(); + } + + @Override + public List performSearch(QueryNode luceneQuery) throws FetcherException { ImportCleanup cleanup = new ImportCleanup(BibDatabaseMode.BIBTEX); // All entries have to be converted into one format, this is necessary for the format conversion return fetchers.parallelStream() .flatMap(searchBasedFetcher -> { try { - return searchBasedFetcher.performSearch(complexSearchQuery).stream(); + return searchBasedFetcher.performSearch(luceneQuery).stream(); } catch (FetcherException e) { LOGGER.warn(String.format("%s API request failed", searchBasedFetcher.getName()), e); return Stream.empty(); @@ -52,14 +63,4 @@ public List performSearch(ComplexSearchQuery complexSearchQuery) { .map(cleanup::doPostCleanup) .collect(Collectors.toList()); } - - @Override - public String getName() { - return "SearchAll"; - } - - @Override - public Optional getHelpPage() { - return Optional.empty(); - } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java b/src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java index fc7603ecd5f..b644c56659d 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/CrossRef.java @@ -18,6 +18,7 @@ import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.util.JsonReader; import org.jabref.logic.util.strings.StringSimilarity; import org.jabref.model.entry.AuthorList; @@ -32,6 +33,7 @@ import kong.unirest.json.JSONException; import kong.unirest.json.JSONObject; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * A class for fetching DOIs from CrossRef @@ -63,9 +65,9 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(API_URL); - uriBuilder.addParameter("query", query); + uriBuilder.addParameter("query", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); return uriBuilder.build().toURL(); } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java index cb223ce061d..2d12d9820f5 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DBLPFetcher.java @@ -16,6 +16,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DBLPQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.layout.LayoutFormatterBasedFormatter; import org.jabref.logic.layout.format.RemoveLatexCommandsFormatter; @@ -24,6 +25,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * Fetches BibTeX data from DBLP (dblp.org) @@ -42,9 +44,9 @@ public DBLPFetcher(ImportFormatPreferences importFormatPreferences) { } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL); - uriBuilder.addParameter("q", query); + uriBuilder.addParameter("q", new DBLPQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); uriBuilder.addParameter("h", String.valueOf(100)); // number of hits uriBuilder.addParameter("c", String.valueOf(0)); // no need for auto-completion uriBuilder.addParameter("f", String.valueOf(0)); // "from", index of first hit to download diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DOAJFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DOAJFetcher.java index 39146f3f120..5f62756cdf7 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DOAJFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DOAJFetcher.java @@ -16,6 +16,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.util.OS; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; @@ -26,6 +27,7 @@ import kong.unirest.json.JSONArray; import kong.unirest.json.JSONObject; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -188,9 +190,9 @@ public Optional getHelpPage() { } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(SEARCH_URL); - DOAJFetcher.addPath(uriBuilder, query); + DOAJFetcher.addPath(uriBuilder, new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // Number of results uriBuilder.addParameter("pageSize", "30"); // Page (not needed so far) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java index 58d6c8546b7..ffd4f9a55e0 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java @@ -19,6 +19,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.PagedSearchBasedFetcher; import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.fetcher.transformators.ScholarQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.l10n.Localization; import org.jabref.logic.net.URLDownload; @@ -28,6 +29,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -127,16 +129,6 @@ public Optional getHelpPage() { return Optional.of(HelpFile.FETCHER_GOOGLE_SCHOLAR); } - private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery) { - List searchTerms = new ArrayList<>(); - searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases()); - complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("author:" + author)); - searchTerms.add("allintitle:" + String.join(" ", complexSearchQuery.getTitlePhrases())); - complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("source:" + journal)); - // API automatically ANDs the terms - return String.join(" ", searchTerms); - } - private void addHitsFromQuery(List entryList, String queryURL) throws IOException, FetcherException { String content = new URLDownload(queryURL).asString(); @@ -185,24 +177,20 @@ private void obtainAndModifyCookie() throws FetcherException { } @Override - public Page performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException { + public Page performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException { + ScholarQueryTransformer queryTransformer = new ScholarQueryTransformer(); + String transformedQuery = queryTransformer.transformLuceneQuery(luceneQuery).orElse(""); try { obtainAndModifyCookie(); List foundEntries = new ArrayList<>(10); - - String complexQueryString = constructComplexQueryString(complexSearchQuery); URIBuilder uriBuilder = new URIBuilder(BASIC_SEARCH_URL); uriBuilder.addParameter("hl", "en"); uriBuilder.addParameter("btnG", "Search"); - uriBuilder.addParameter("q", complexQueryString); + uriBuilder.addParameter("q", transformedQuery); uriBuilder.addParameter("start", String.valueOf(pageNumber * getPageSize())); uriBuilder.addParameter("num", String.valueOf(getPageSize())); - complexSearchQuery.getFromYear().ifPresent(year -> uriBuilder.addParameter("as_ylo", year.toString())); - complexSearchQuery.getToYear().ifPresent(year -> uriBuilder.addParameter("as_yhi", year.toString())); - complexSearchQuery.getSingleYear().ifPresent(year -> { - uriBuilder.addParameter("as_ylo", year.toString()); - uriBuilder.addParameter("as_yhi", year.toString()); - }); + uriBuilder.addParameter("as_ylo", String.valueOf(queryTransformer.getStartYear())); + uriBuilder.addParameter("as_yhi", String.valueOf(queryTransformer.getEndYear())); try { addHitsFromQuery(foundEntries, uriBuilder.toString()); @@ -223,7 +211,7 @@ public Page performSearchPaged(ComplexSearchQuery complexSearchQuery, throw new FetcherException("Error while fetching from " + getName(), e); } } - return new Page<>(complexQueryString, pageNumber, foundEntries); + return new Page<>(transformedQuery, pageNumber, foundEntries); } catch (URISyntaxException e) { throw new FetcherException("Error while fetching from " + getName(), e); } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java index 61218575668..bf16d71570d 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.net.SocketTimeoutException; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -16,6 +17,7 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.util.DummyFileUpdateMonitor; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +44,7 @@ public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) { * * @return A BibTeX string if extraction is successful */ - private Optional parseUsingGrobid(String plainText) { + private Optional parseUsingGrobid(String plainText) throws RuntimeException { try { return Optional.of(grobidService.processCitation(plainText, GrobidService.ConsolidateCitations.WITH_METADATA)); } catch (SocketTimeoutException e) { @@ -52,7 +54,7 @@ private Optional parseUsingGrobid(String plainText) { } catch (IOException e) { String msg = "Could not process citation. " + e.getMessage(); LOGGER.debug(msg, e); - throw new RuntimeException(msg, e); + return Optional.empty(); } } @@ -66,30 +68,33 @@ private Optional parseBibToBibEntry(String bibtexString) { } @Override - public List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { - List bibEntries = null; - // This just treats the complex query like a normal string query until it it implemented correctly - String query = complexSearchQuery.toString(); + public String getName() { + return "GROBID"; + } + + @Override + public List performSearch(String searchQuery) throws FetcherException { + List collect; try { - bibEntries = Arrays - .stream(query.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+")) - .map(String::trim) - .filter(str -> !str.isBlank()) - .map(this::parseUsingGrobid) - .flatMap(Optional::stream) - .map(this::parseBibToBibEntry) - .flatMap(Optional::stream) - .collect(Collectors.toList()); + collect = Arrays.stream(searchQuery.split("\\r\\r+|\\n\\n+|\\r\\n(\\r\\n)+")) + .map(String::trim) + .filter(str -> !str.isBlank()) + .map(this::parseUsingGrobid) + .flatMap(Optional::stream) + .map(this::parseBibToBibEntry) + .flatMap(Optional::stream) + .collect(Collectors.toList()); } catch (RuntimeException e) { - // un-wrap the wrapped exceptions throw new FetcherException(e.getMessage(), e.getCause()); } - return bibEntries; + return collect; } + /** + * Not used + */ @Override - public String getName() { - return "GROBID"; + public List performSearch(QueryNode luceneQuery) throws FetcherException { + return Collections.emptyList(); } - } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GvkFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/GvkFetcher.java index 20b4fe34630..db6afbc6606 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GvkFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GvkFetcher.java @@ -5,19 +5,17 @@ import java.net.URL; import java.util.Arrays; import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Objects; import java.util.Optional; -import java.util.stream.Collectors; import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.GVKQueryTransformer; import org.jabref.logic.importer.fileformat.GvkParser; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; public class GvkFetcher implements SearchBasedParserFetcher { @@ -39,43 +37,12 @@ public Optional getHelpPage() { return Optional.of(HelpFile.FETCHER_GVK); } - private String getSearchQueryStringForComplexQuery(List queryList) { - String query = ""; - boolean lastWasNoKey = false; - - for (String key : queryList) { - if (searchKeys.contains(key)) { - if (lastWasNoKey) { - query = query + "and "; - } - query = query + "pica." + key + "="; - } else { - query = query + key + " "; - lastWasNoKey = true; - } - } - return query.trim(); - } - - protected String getSearchQueryString(String query) { - Objects.requireNonNull(query); - LinkedList queryList = new LinkedList<>(Arrays.asList(query.split("\\s"))); - - if (searchKeys.contains(queryList.get(0))) { - return getSearchQueryStringForComplexQuery(queryList); - } else { - // query as pica.all - return queryList.stream().collect(Collectors.joining(" ", "pica.all=", "")); - } - } - @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { - String gvkQuery = getSearchQueryString(query); + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(URL_PATTERN); uriBuilder.addParameter("version", "1.1"); uriBuilder.addParameter("operation", "searchRetrieve"); - uriBuilder.addParameter("query", gvkQuery); + uriBuilder.addParameter("query", new GVKQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); uriBuilder.addParameter("maximumRecords", "50"); uriBuilder.addParameter("recordSchema", "picaxml"); uriBuilder.addParameter("sortKeys", "Year,,1"); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java b/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java index d89223908a0..9cd5341d357 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IEEE.java @@ -15,10 +15,12 @@ import java.util.stream.Collectors; import org.jabref.logic.help.HelpFile; +import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.FulltextFetcher; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.PagedSearchBasedParserFetcher; import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.fetcher.transformators.IEEEQueryTransformer; import org.jabref.logic.net.URLDownload; import org.jabref.logic.util.BuildInfo; import org.jabref.logic.util.OS; @@ -31,6 +33,7 @@ import kong.unirest.json.JSONArray; import kong.unirest.json.JSONObject; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -222,48 +225,33 @@ public Optional getHelpPage() { } @Override - public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException { - URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); - uriBuilder.addParameter("apikey", API_KEY); - uriBuilder.addParameter("querytext", query); - uriBuilder.addParameter("max_records", String.valueOf(getPageSize())); - // Starts to index at 1 for the first entry - uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1); - - URLDownload.bypassSSLVerification(); - - return uriBuilder.build().toURL(); - } - - @Override - public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException { + public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException { + IEEEQueryTransformer transformer = new IEEEQueryTransformer(); + String transformedQuery = transformer.transformLuceneQuery(luceneQuery).orElse(""); URIBuilder uriBuilder = new URIBuilder("https://ieeexploreapi.ieee.org/api/v1/search/articles"); uriBuilder.addParameter("apikey", API_KEY); + if (!transformedQuery.isBlank()) { + uriBuilder.addParameter("querytext", transformedQuery); + } uriBuilder.addParameter("max_records", String.valueOf(getPageSize())); - // Starts to index at 1 for the first entry - uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1); - - if (!complexSearchQuery.getDefaultFieldPhrases().isEmpty()) { - uriBuilder.addParameter("querytext", String.join(" AND ", complexSearchQuery.getDefaultFieldPhrases())); + // Currently not working as part of the query string + if (transformer.getJournal().isPresent()) { + uriBuilder.addParameter("publication_title", transformer.getJournal().get()); } - if (!complexSearchQuery.getAuthors().isEmpty()) { - uriBuilder.addParameter("author", String.join(" AND ", complexSearchQuery.getAuthors())); + if (transformer.getStartYear().isPresent()) { + uriBuilder.addParameter("start_year", String.valueOf(transformer.getStartYear().get())); } - if (!complexSearchQuery.getAbstractPhrases().isEmpty()) { - uriBuilder.addParameter("abstract", String.join(" AND ", complexSearchQuery.getAbstractPhrases())); + if (transformer.getEndYear().isPresent()) { + uriBuilder.addParameter("end_year", String.valueOf(transformer.getEndYear().get())); } - if (!complexSearchQuery.getTitlePhrases().isEmpty()) { - uriBuilder.addParameter("article_title", String.join(" AND ", complexSearchQuery.getTitlePhrases())); + if (transformer.getArticleNumber().isPresent()) { + uriBuilder.addParameter("article_number", transformer.getArticleNumber().get()); } - complexSearchQuery.getJournal().ifPresent(journalTitle -> uriBuilder.addParameter("publication_title", journalTitle)); - complexSearchQuery.getFromYear().map(String::valueOf).ifPresent(year -> uriBuilder.addParameter("start_year", year)); - complexSearchQuery.getToYear().map(String::valueOf).ifPresent(year -> uriBuilder.addParameter("end_year", year)); - complexSearchQuery.getSingleYear().map(String::valueOf).ifPresent(year -> { - uriBuilder.addParameter("start_year", year); - uriBuilder.addParameter("end_year", year); - }); + // Starts to index at 1 for the first entry + uriBuilder.addParameter("start_record", String.valueOf(getPageSize() * pageNumber) + 1); URLDownload.bypassSSLVerification(); + return uriBuilder.build().toURL(); } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java index d7239dee240..c594ebb5efc 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/INSPIREFetcher.java @@ -13,6 +13,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.importer.util.MediaTypes; import org.jabref.logic.layout.format.LatexToUnicodeFormatter; @@ -23,6 +24,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * Fetches data from the INSPIRE database. @@ -48,9 +50,9 @@ public Optional getHelpPage() { } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST); - uriBuilder.addParameter("q", query); // Query + uriBuilder.addParameter("q", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // Query return uriBuilder.build().toURL(); } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java index 6315e708c52..4c2f49a16d5 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/JstorFetcher.java @@ -19,6 +19,7 @@ import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.net.URLDownload; import org.jabref.model.entry.BibEntry; @@ -26,6 +27,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -47,44 +49,9 @@ public JstorFetcher(ImportFormatPreferences importFormatPreferences) { } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(SEARCH_HOST); - uriBuilder.addParameter("Query", query); - return uriBuilder.build().toURL(); - } - - @Override - public URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException { - URIBuilder uriBuilder = new URIBuilder(SEARCH_HOST); - StringBuilder stringBuilder = new StringBuilder(); - if (!query.getDefaultFieldPhrases().isEmpty()) { - stringBuilder.append(query.getDefaultFieldPhrases()); - } - if (!query.getAuthors().isEmpty()) { - for (String author : query.getAuthors()) { - stringBuilder.append("au:").append(author); - } - } - if (!query.getTitlePhrases().isEmpty()) { - for (String title : query.getTitlePhrases()) { - stringBuilder.append("ti:").append(title); - } - } - if (query.getJournal().isPresent()) { - stringBuilder.append("pt:").append(query.getJournal().get()); - } - if (query.getSingleYear().isPresent()) { - uriBuilder.addParameter("sd", String.valueOf(query.getSingleYear().get())); - uriBuilder.addParameter("ed", String.valueOf(query.getSingleYear().get())); - } - if (query.getFromYear().isPresent()) { - uriBuilder.addParameter("sd", String.valueOf(query.getFromYear().get())); - } - if (query.getToYear().isPresent()) { - uriBuilder.addParameter("ed", String.valueOf(query.getToYear().get())); - } - - uriBuilder.addParameter("Query", stringBuilder.toString()); + uriBuilder.addParameter("Query", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); return uriBuilder.build().toURL(); } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/MathSciNet.java b/src/main/java/org/jabref/logic/importer/fetcher/MathSciNet.java index 3ff0bd243b8..bfc2ca3ab8c 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/MathSciNet.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/MathSciNet.java @@ -23,6 +23,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.util.OS; import org.jabref.model.entry.BibEntry; @@ -31,6 +32,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * Fetches data from the MathSciNet (http://www.ams.org/mathscinet) @@ -72,10 +74,10 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR } @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder("https://mathscinet.ams.org/mathscinet/search/publications.html"); uriBuilder.addParameter("pg7", "ALLF"); // search all fields - uriBuilder.addParameter("s7", query); // query + uriBuilder.addParameter("s7", new DefaultQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // query uriBuilder.addParameter("r", "1"); // start index uriBuilder.addParameter("extend", "1"); // should return up to 100 items (instead of default 10) uriBuilder.addParameter("fmt", "bibtex"); // BibTeX format diff --git a/src/main/java/org/jabref/logic/importer/fetcher/MedlineFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/MedlineFetcher.java index a4553bc876b..304b76da38a 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/MedlineFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/MedlineFetcher.java @@ -28,6 +28,7 @@ import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.ParserResult; import org.jabref.logic.importer.SearchBasedFetcher; +import org.jabref.logic.importer.fetcher.transformators.DefaultQueryTransformer; import org.jabref.logic.importer.fileformat.MedlineImporter; import org.jabref.logic.l10n.Localization; import org.jabref.model.entry.BibEntry; @@ -35,6 +36,7 @@ import org.jabref.model.entry.field.UnknownField; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,33 +159,6 @@ public void doPostCleanup(BibEntry entry) { new FieldFormatterCleanup(StandardField.AUTHOR, new NormalizeNamesFormatter()).cleanup(entry); } - @Override - public List performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException { - List entryList; - String query = complexSearchQuery.toString(); - - if (query.isBlank()) { - return Collections.emptyList(); - } else { - // searching for pubmed ids matching the query - List idList = getPubMedIdsFromQuery(query); - - if (idList.isEmpty()) { - LOGGER.info("No results found."); - return Collections.emptyList(); - } - if (numberOfResultsFound > NUMBER_TO_FETCH) { - LOGGER.info( - numberOfResultsFound + " results found. Only 50 relevant results will be fetched by default."); - } - - // pass the list of ids to fetchMedline to download them. like a id fetcher for mutliple ids - entryList = fetchMedline(idList); - - return entryList; - } - } - private URL createSearchUrl(String query) throws URISyntaxException, MalformedURLException { URIBuilder uriBuilder = new URIBuilder(SEARCH_URL); uriBuilder.addParameter("db", "pubmed"); @@ -221,4 +196,32 @@ private List fetchMedline(List ids) throws FetcherException { Localization.lang("Error while fetching from %0", "Medline"), e); } } + + @Override + public List performSearch(QueryNode luceneQuery) throws FetcherException { + List entryList; + DefaultQueryTransformer transformer = new DefaultQueryTransformer(); + Optional transformedQuery = transformer.transformLuceneQuery(luceneQuery); + + if (transformedQuery.isEmpty() || transformedQuery.get().isBlank()) { + return Collections.emptyList(); + } else { + // searching for pubmed ids matching the query + List idList = getPubMedIdsFromQuery(transformedQuery.get()); + + if (idList.isEmpty()) { + LOGGER.info("No results found."); + return Collections.emptyList(); + } + if (numberOfResultsFound > NUMBER_TO_FETCH) { + LOGGER.info( + numberOfResultsFound + " results found. Only 50 relevant results will be fetched by default."); + } + + // pass the list of ids to fetchMedline to download them. like a id fetcher for mutliple ids + entryList = fetchMedline(idList); + + return entryList; + } + } } diff --git a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java index 1064a7f272e..5c3b34b6d01 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/SpringerFetcher.java @@ -11,8 +11,10 @@ import java.util.stream.Collectors; import org.jabref.logic.help.HelpFile; +import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.PagedSearchBasedParserFetcher; import org.jabref.logic.importer.Parser; +import org.jabref.logic.importer.fetcher.transformators.SpringerQueryTransformer; import org.jabref.logic.util.BuildInfo; import org.jabref.logic.util.OS; import org.jabref.model.entry.BibEntry; @@ -25,6 +27,7 @@ import kong.unirest.json.JSONArray; import kong.unirest.json.JSONObject; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,20 +161,15 @@ public Optional getHelpPage() { } @Override - public URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException { + public URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(API_URL); - uriBuilder.addParameter("q", query); // Search query + uriBuilder.addParameter("q", new SpringerQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // Search query uriBuilder.addParameter("api_key", API_KEY); // API key uriBuilder.addParameter("s", String.valueOf(getPageSize() * pageNumber + 1)); // Start entry, starts indexing at 1 uriBuilder.addParameter("p", String.valueOf(getPageSize())); // Page size return uriBuilder.build().toURL(); } - @Override - public URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException { - return getURLForQuery(constructComplexQueryString(complexSearchQuery), pageNumber); - } - private String constructComplexQueryString(ComplexSearchQuery complexSearchQuery) { List searchTerms = new ArrayList<>(); complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("name:" + author)); diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ZbMATH.java b/src/main/java/org/jabref/logic/importer/fetcher/ZbMATH.java index 31105ede0ab..69ea0c81b2a 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/ZbMATH.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/ZbMATH.java @@ -13,6 +13,7 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; +import org.jabref.logic.importer.fetcher.transformators.ZbMathQueryTransformer; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.net.URLDownload; import org.jabref.model.entry.BibEntry; @@ -21,6 +22,7 @@ import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.http.client.utils.URIBuilder; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; /** * Fetches data from the Zentralblatt Math (https://www.zbmath.org/) @@ -50,9 +52,9 @@ public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedUR } */ @Override - public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { + public URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder("https://zbmath.org/bibtexoutput/"); - uriBuilder.addParameter("q", query); // search all fields + uriBuilder.addParameter("q", new ZbMathQueryTransformer().transformLuceneQuery(luceneQuery).orElse("")); // search all fields uriBuilder.addParameter("start", "0"); // start index uriBuilder.addParameter("count", "200"); // should return up to 200 items (instead of default 100) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/AbstractQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/AbstractQueryTransformer.java new file mode 100644 index 00000000000..32c7a6b8d18 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/AbstractQueryTransformer.java @@ -0,0 +1,199 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * In case the transformator contains state for a query transformation (such as the {@link IEEEQueryTransformer}), it has to be noted at the JavaDoc. + * Otherwise, a single instance QueryTransformer can be used. + */ +public abstract class AbstractQueryTransformer { + public static final String NO_EXPLICIT_FIELD = "default"; + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractQueryTransformer.class); + + /** + * Transforms a and b and c to (a AND b AND c), where + * a, b, and c can be complex expressions. + */ + private Optional transform(BooleanQueryNode query) { + String delimiter; + if (query instanceof OrQueryNode) { + delimiter = getLogicalOrOperator(); + } else { + // We define the logical AND operator as the default implementation + delimiter = getLogicalAndOperator(); + } + + String result = query.getChildren().stream() + .map(this::transform) + .flatMap(Optional::stream) + .collect(Collectors.joining(delimiter, "(", ")")); + if (result.equals("()")) { + return Optional.empty(); + } + return Optional.of(result); + } + + /** + * Returns the logical AND operator used by the library + * Note: whitespaces have to be included around the operator + * + * Example: " AND " + */ + protected abstract String getLogicalAndOperator(); + + /** + * Returns the logical OR operator used by the library + * Note: whitespaces have to be included around the operator + * + * Example: " OR " + */ + protected abstract String getLogicalOrOperator(); + + /** + * Returns the logical NOT operator used by the library + * + * Example: "!" + */ + protected abstract String getLogicalNotOperator(); + + private Optional transform(FieldQueryNode query) { + String term = query.getTextAsString(); + switch (query.getFieldAsString()) { + case "author" -> { + return Optional.of(handleAuthor(term)); + } + case "title" -> { + return Optional.of(handleTitle(term)); + } + case "journal" -> { + return Optional.of(handleJournal(term)); + } + case "year" -> { + String s = handleYear(term); + return s.isEmpty() ? Optional.empty() : Optional.of(s); + } + case "year-range" -> { + String s = handleYearRange(term); + return s.isEmpty() ? Optional.empty() : Optional.of(s); + } + case "doi" -> { + String s = handleDoi(term); + return s.isEmpty() ? Optional.empty() : Optional.of(s); + } + case NO_EXPLICIT_FIELD -> { + return Optional.of(handleUnFieldedTerm(term)); + } + default -> { + // Just add unknown fields as default + return handleOtherField(query.getFieldAsString(), term); + } + } + } + + protected String handleDoi(String term) { + return "doi:" + term; + } + + /** + * Handles the not modifier, all other cases are silently ignored + */ + private Optional transform(ModifierQueryNode query) { + ModifierQueryNode.Modifier modifier = query.getModifier(); + if (modifier == ModifierQueryNode.Modifier.MOD_NOT) { + return transform(query.getChild()).map(s -> getLogicalNotOperator() + s); + } else { + return transform(query.getChild()); + } + } + + /** + * Return a string representation of the author fielded term + */ + protected abstract String handleAuthor(String author); + + /** + * Return a string representation of the title fielded term + */ + protected abstract String handleTitle(String title); + + /** + * Return a string representation of the journal fielded term + */ + protected abstract String handleJournal(String journalTitle); + + /** + * Return a string representation of the year fielded term + */ + protected abstract String handleYear(String year); + + /** + * Return a string representation of the year-range fielded term + * Should follow the structure yyyy-yyyy + * + * Example: 2015-2021 + */ + protected abstract String handleYearRange(String yearRange); + + /** + * Return a string representation of the un-fielded (default fielded) term + */ + protected abstract String handleUnFieldedTerm(String term); + + /** + * Return a string representation of the provided field + * If it is not supported return an empty optional. + */ + protected Optional handleOtherField(String fieldAsString, String term) { + return Optional.of(String.format("%s:\"%s\"", fieldAsString, term)); + } + + private Optional transform(QueryNode query) { + if (query instanceof BooleanQueryNode) { + return transform((BooleanQueryNode) query); + } else if (query instanceof FieldQueryNode) { + return transform((FieldQueryNode) query); + } else if (query instanceof GroupQueryNode) { + return transform(((GroupQueryNode) query).getChild()); + } else if (query instanceof ModifierQueryNode) { + return transform((ModifierQueryNode) query); + } else { + LOGGER.error("Unsupported case when transforming the query:\n {}", query); + return Optional.empty(); + } + } + + /** + * Parses the given query string into a complex query using lucene. + * Note: For unique fields, the alphabetically and numerically first instance in the query string is used in the complex query. + * + * @param luceneQuery The lucene query tp transform + * @return A query string containing all fields that are contained in the original lucene query and + * that are expressible in the library specific query language, other information either is discarded or + * stored as part of the state of the transformer if it can be used e.g. as a URL parameter for the query. + */ + public Optional transformLuceneQuery(QueryNode luceneQuery) { + Optional transformedQuery = transform(luceneQuery); + transformedQuery = transformedQuery.map(this::removeOuterBraces); + return transformedQuery; + } + + /** + * Removes the outer braces as they are unnecessary + */ + private String removeOuterBraces(String query) { + if (query.startsWith("(") && query.endsWith(")")) { + return query.substring(1, query.length() - 1); + } + return query; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformer.java new file mode 100644 index 00000000000..dd94eab746b --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformer.java @@ -0,0 +1,77 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +public class ArXivQueryTransformer extends AbstractQueryTransformer { + // These can be used for filtering in post processing + private int startYear = Integer.MAX_VALUE; + private int endYear = Integer.MIN_VALUE; + + @Override + protected String getLogicalAndOperator() { + return " AND "; + } + + @Override + protected String getLogicalOrOperator() { + return " OR "; + } + + /** + * Check whether this works as an unary operator + * @return + */ + @Override + protected String getLogicalNotOperator() { + return " ANDNOT "; + } + + @Override + protected String handleAuthor(String author) { + return String.format("au:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("ti:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return String.format("jr:\"%s\"", journalTitle); + } + + /** + * Manual testing shows that this works if added as an unfielded term, might lead to false positives + */ + @Override + protected String handleYear(String year) { + startYear = Math.min(startYear, Integer.parseInt(year)); + endYear = Math.max(endYear, Integer.parseInt(year)); + return year; + } + + /** + * Currently not supported + */ + @Override + protected String handleYearRange(String yearRange) { + String[] split = yearRange.split("-"); + startYear = Math.min(startYear, Integer.parseInt(split[0])); + endYear = Math.max(endYear, Integer.parseInt(split[1])); + return ""; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("all:\"%s\"", term); + } + + public Optional getStartYear() { + return startYear == Integer.MAX_VALUE ? Optional.empty() : Optional.of(startYear); + } + + public Optional getEndYear() { + return endYear == Integer.MIN_VALUE ? Optional.empty() : Optional.of(endYear); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformer.java new file mode 100644 index 00000000000..df943509320 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformer.java @@ -0,0 +1,66 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.StringJoiner; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DBLPQueryTransformer extends AbstractQueryTransformer { + private static final Logger LOGGER = LoggerFactory.getLogger(DBLPQueryTransformer.class); + + @Override + protected String getLogicalAndOperator() { + return " "; + } + + @Override + protected String getLogicalOrOperator() { + return "|"; + } + + @Override + protected String getLogicalNotOperator() { + LOGGER.warn("DBLP does not support Boolean NOT operator."); + return ""; + } + + @Override + protected String handleAuthor(String author) { + // DBLP does not support explicit author field search + return String.format("\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + // DBLP does not support explicit title field search + return String.format("\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + // DBLP does not support explicit journal field search + return String.format("\"%s\"", journalTitle); + } + + @Override + protected String handleYear(String year) { + // DBLP does not support explicit year field search + return year; + } + + @Override + protected String handleYearRange(String yearRange) { + // DBLP does not support explicit year range search + String[] split = yearRange.split("-"); + StringJoiner resultBuilder = new StringJoiner(getLogicalOrOperator()); + for (int i = Integer.parseInt(split[0]); i <= Integer.parseInt(split[1]); i++) { + resultBuilder.add(String.valueOf(i)); + } + return resultBuilder.toString(); + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("\"%s\"", term); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/DefaultQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/DefaultQueryTransformer.java new file mode 100644 index 00000000000..b99e327269c --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/DefaultQueryTransformer.java @@ -0,0 +1,49 @@ +package org.jabref.logic.importer.fetcher.transformators; + +public class DefaultQueryTransformer extends AbstractQueryTransformer { + + @Override + protected String getLogicalAndOperator() { + return " "; + } + + @Override + protected String getLogicalOrOperator() { + return " "; + } + + @Override + protected String getLogicalNotOperator() { + return ""; + } + + @Override + protected String handleAuthor(String author) { + return author; + } + + @Override + protected String handleTitle(String title) { + return title; + } + + @Override + protected String handleJournal(String journalTitle) { + return journalTitle; + } + + @Override + protected String handleYear(String year) { + return year; + } + + @Override + protected String handleYearRange(String yearRange) { + return yearRange; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return term; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformer.java new file mode 100644 index 00000000000..402e68b09c0 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformer.java @@ -0,0 +1,67 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GVKQueryTransformer extends AbstractQueryTransformer { + private static final Logger LOGGER = LoggerFactory.getLogger(GVKQueryTransformer.class); + + @Override + protected String getLogicalAndOperator() { + return " and "; + } + + @Override + protected String getLogicalOrOperator() { + LOGGER.warn("GVK does not support Boolean OR operator"); + return ""; + } + + @Override + protected String getLogicalNotOperator() { + LOGGER.warn("GVK does not support Boolean NOT operator"); + return ""; + } + + @Override + protected String handleAuthor(String author) { + return String.format("pica.per=\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("pica.tit=\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + // zti means "Zeitschrift", does not search for conferences (kon:) + return String.format("pica.zti=\"%s\"", journalTitle); + } + + @Override + protected String handleYear(String year) { + // ver means Veröffentlichungsangaben + return "pica.ver=" + year; + } + + @Override + protected String handleYearRange(String yearRange) { + // Returns empty string as otherwise leads to no results + return ""; + } + + @Override + protected String handleUnFieldedTerm(String term) { + // all does not search in full-text + // Other option is txt: but this does not search in meta data + return String.format("pica.all=\"%s\"", term); + } + + @Override + protected Optional handleOtherField(String fieldAsString, String term) { + return Optional.of("pica." + fieldAsString + "=\"" + term + "\""); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformer.java new file mode 100644 index 00000000000..a6bb4cd49c4 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformer.java @@ -0,0 +1,96 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Objects; +import java.util.Optional; + +/** + * Needs to be instantiated for each new query + */ +public class IEEEQueryTransformer extends AbstractQueryTransformer { + // These have to be integrated into the IEEE query URL as these are just supported as query parameters + // Journal is wrapped in quotes by the transformer + private String journal; + private String articleNumber; + private int startYear = Integer.MAX_VALUE; + private int endYear = Integer.MIN_VALUE; + + @Override + protected String getLogicalAndOperator() { + return " AND "; + } + + @Override + protected String getLogicalOrOperator() { + return " OR "; + } + + @Override + protected String getLogicalNotOperator() { + return "NOT "; + } + + @Override + protected String handleAuthor(String author) { + return String.format("author:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("article_title:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + journal = String.format("\"%s\"", journalTitle); + return ""; + } + + @Override + protected String handleYear(String year) { + startYear = Math.min(startYear, Integer.parseInt(year)); + endYear = Math.max(endYear, Integer.parseInt(year)); + return ""; + } + + @Override + protected String handleYearRange(String yearRange) { + String[] split = yearRange.split("-"); + startYear = Math.min(startYear, Integer.parseInt(split[0])); + endYear = Math.max(endYear, Integer.parseInt(split[1])); + return ""; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("\"%s\"", term); + } + + @Override + protected Optional handleOtherField(String fieldAsString, String term) { + return switch (fieldAsString) { + case "article_number" -> handleArticleNumber(term); + default -> super.handleOtherField(fieldAsString, term); + }; + } + + private Optional handleArticleNumber(String term) { + articleNumber = term; + return Optional.empty(); + } + + public Optional getStartYear() { + return startYear == Integer.MAX_VALUE ? Optional.empty() : Optional.of(startYear); + } + + public Optional getEndYear() { + return endYear == Integer.MIN_VALUE ? Optional.empty() : Optional.of(endYear); + } + + public Optional getJournal() { + return Objects.isNull(journal) ? Optional.empty() : Optional.of(journal); + } + + public Optional getArticleNumber() { + return Objects.isNull(articleNumber) ? Optional.empty() : Optional.of(articleNumber); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformer.java new file mode 100644 index 00000000000..1c7be5db728 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformer.java @@ -0,0 +1,49 @@ +package org.jabref.logic.importer.fetcher.transformators; + +public class JstorQueryTransformer extends AbstractQueryTransformer { + @Override + protected String getLogicalAndOperator() { + return " AND "; + } + + @Override + protected String getLogicalOrOperator() { + return " OR "; + } + + @Override + protected String getLogicalNotOperator() { + return "NOT "; + } + + @Override + protected String handleAuthor(String author) { + return String.format("au:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("ti:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return String.format("pt:\"%s\"", journalTitle); + } + + @Override + protected String handleYear(String year) { + return "sd:" + year + getLogicalAndOperator() + "ed: " + year; + } + + @Override + protected String handleYearRange(String yearRange) { + String[] split = yearRange.split("-"); + return "sd:" + split[0] + getLogicalAndOperator() + "ed:" + split[1]; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("\"%s\"", term); + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformer.java new file mode 100644 index 00000000000..a83db9364b1 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformer.java @@ -0,0 +1,65 @@ +package org.jabref.logic.importer.fetcher.transformators; + +public class ScholarQueryTransformer extends AbstractQueryTransformer { + // These have to be integrated into the Google Scholar query URL as these are just supported as query parameters + private int startYear = Integer.MAX_VALUE; + private int endYear = Integer.MIN_VALUE; + + @Override + protected String getLogicalAndOperator() { + return " AND "; + } + + @Override + protected String getLogicalOrOperator() { + return " OR "; + } + + @Override + protected String getLogicalNotOperator() { + return "-"; + } + + @Override + protected String handleAuthor(String author) { + return String.format("author:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("allintitle:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return String.format("source:\"%s\"", journalTitle); + } + + @Override + protected String handleYear(String year) { + startYear = Math.min(startYear, Integer.parseInt(year)); + endYear = Math.max(endYear, Integer.parseInt(year)); + return ""; + } + + @Override + protected String handleYearRange(String yearRange) { + String[] split = yearRange.split("-"); + startYear = Math.min(startYear, Integer.parseInt(split[0])); + endYear = Math.max(endYear, Integer.parseInt(split[1])); + return ""; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("\"%s\"", term); + } + + public int getStartYear() { + return startYear == Integer.MAX_VALUE ? Integer.MIN_VALUE : startYear; + } + + public int getEndYear() { + return endYear == Integer.MIN_VALUE ? Integer.MAX_VALUE : endYear; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformer.java new file mode 100644 index 00000000000..3907caef708 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformer.java @@ -0,0 +1,62 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.StringJoiner; + +/** + * This class converts a query string written in lucene syntax into a complex query. + * + * For simplicity this is currently limited to fielded data and the boolean AND operator. + */ +public class SpringerQueryTransformer extends AbstractQueryTransformer { + + @Override + public String getLogicalAndOperator() { + return " AND "; + } + + @Override + public String getLogicalOrOperator() { + return " OR "; + } + + @Override + protected String getLogicalNotOperator() { + return "-"; + } + + @Override + protected String handleAuthor(String author) { + return String.format("name:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("title:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return String.format("journal:\"%s\"", journalTitle); + + } + + @Override + protected String handleYear(String year) { + return String.format("date:%s*", year); + } + + @Override + protected String handleYearRange(String yearRange) { + String[] split = yearRange.split("-"); + StringJoiner resultBuilder = new StringJoiner("*" + getLogicalOrOperator() + "date:", "(date:", "*)"); + for (int i = Integer.parseInt(split[0]); i <= Integer.parseInt(split[1]); i++) { + resultBuilder.add(String.valueOf(i)); + } + return resultBuilder.toString(); + } + + @Override + protected String handleUnFieldedTerm(String term) { + return "\"" + term + "\""; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformer.java b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformer.java new file mode 100644 index 00000000000..b2a466c20a1 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformer.java @@ -0,0 +1,49 @@ +package org.jabref.logic.importer.fetcher.transformators; + +public class ZbMathQueryTransformer extends AbstractQueryTransformer { + + @Override + protected String getLogicalAndOperator() { + return " & "; + } + + @Override + protected String getLogicalOrOperator() { + return " | "; + } + + @Override + protected String getLogicalNotOperator() { + return "!"; + } + + @Override + protected String handleAuthor(String author) { + return String.format("au:\"%s\"", author); + } + + @Override + protected String handleTitle(String title) { + return String.format("ti:\"%s\"", title); + } + + @Override + protected String handleJournal(String journalTitle) { + return String.format("so:\"%s\"", journalTitle); + } + + @Override + protected String handleYear(String year) { + return "py:" + year; + } + + @Override + protected String handleYearRange(String yearRange) { + return "py:" + yearRange; + } + + @Override + protected String handleUnFieldedTerm(String term) { + return String.format("any:\"%s\"", term); + } +} diff --git a/src/test/java/org/jabref/logic/importer/QueryParserTest.java b/src/test/java/org/jabref/logic/importer/QueryParserTest.java index 970788e86c3..acb8c41a40c 100644 --- a/src/test/java/org/jabref/logic/importer/QueryParserTest.java +++ b/src/test/java/org/jabref/logic/importer/QueryParserTest.java @@ -6,7 +6,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; -class QueryParserTest { +public class QueryParserTest { QueryParser parser = new QueryParser(); @Test diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java index 9fdf8b15ee8..2009f12e798 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java @@ -229,7 +229,7 @@ public SearchBasedFetcher getFetcher() { @Override public List getTestAuthors() { - return List.of("\"Tobias Diez\""); + return List.of("Tobias Diez"); } @Disabled("Is not supported by the current API") @@ -247,7 +247,7 @@ public void supportsYearRangeSearch() throws Exception { @Override public String getTestJournal() { - return "\"Journal of Geometry and Physics (2013)\""; + return "Journal of Geometry and Physics (2013)"; } /** diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java index 577f4624196..539decbf37c 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CollectionOfComputerScienceBibliographiesFetcherTest.java @@ -10,12 +10,15 @@ import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.field.UnknownField; import org.jabref.model.entry.types.StandardEntryType; import org.jabref.testutils.category.FetcherTest; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Answers; @@ -41,9 +44,9 @@ public void getNameReturnsCorrectName() { } @Test - public void getUrlForQueryReturnsCorrectUrl() throws MalformedURLException, URISyntaxException, FetcherException { + public void getUrlForQueryReturnsCorrectUrl() throws MalformedURLException, URISyntaxException, FetcherException, QueryNodeParseException { String query = "java jdk"; - URL url = fetcher.getURLForQuery(query); + URL url = fetcher.getURLForQuery(new StandardSyntaxParser().parse(query, AbstractQueryTransformer.NO_EXPLICIT_FIELD)); assertEquals("http://liinwww.ira.uka.de/bibliography/rss?query=java+jdk&sort=score", url.toString()); } @@ -63,7 +66,7 @@ public void performSearchReturnsMatchingMultipleEntries() throws FetcherExceptio .withField(StandardField.YEAR, "2017") .withField(StandardField.BOOKTITLE, "11th European Conference on Software Architecture, ECSA 2017, Companion Proceedings, Canterbury, United Kingdom, September 11-15, 2017") .withField(new UnknownField("bibsource"), "DBLP, http://dblp.uni-trier.de/https://doi.org/10.1145/3129790.3129810; DBLP, http://dblp.uni-trier.de/db/conf/ecsa/ecsa2017c.html#OlssonEW17") - .withField(new UnknownField("bibdate"), "2018-11-06"); + .withField(new UnknownField("bibdate"), "2020-10-25"); BibEntry secondBibEntry = new BibEntry(StandardEntryType.Article) .withCitationKey("oai:DiVA.org:lnu-68408") diff --git a/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java index b639c35cf5b..1c329d419a8 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/CompositeSearchBasedFetcherTest.java @@ -114,8 +114,8 @@ static Stream performSearchParameters() { // list.add(new MedlineFetcher()); // Create different sized sets of fetchers to use in the composite fetcher. - // Selected 273 to have differencing sets - for (int i = 1; i < Math.pow(2, list.size()); i += 273) { + // Selected 1173 to have differencing sets + for (int i = 1; i < Math.pow(2, list.size()); i += 1173) { Set fetchers = new HashSet<>(); // Only shift i at maximum to its MSB to the right for (int j = 0; Math.pow(2, j) <= i; j++) { diff --git a/src/test/java/org/jabref/logic/importer/fetcher/DBLPFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/DBLPFetcherTest.java index 83414d8666d..001d5071fa5 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/DBLPFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/DBLPFetcherTest.java @@ -50,7 +50,8 @@ public void setUp() { @Test public void findSingleEntry() throws FetcherException { - String query = "Process Engine Benchmarking with Betsy in the Context of {ISO/IEC} Quality Standards"; + // In Lucene curly brackets are used for range queries, therefore they have to be escaped using "". See https://lucene.apache.org/core/5_4_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html + String query = "Process Engine Benchmarking with Betsy in the Context of \"{ISO/IEC}\" Quality Standards"; List result = dblpFetcher.performSearch(query); assertEquals(Collections.singletonList(entry), result); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java index 329748dcd56..f970273c34b 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java @@ -1,6 +1,7 @@ package org.jabref.logic.importer.fetcher; import java.io.IOException; +import java.net.SocketTimeoutException; import java.util.Collections; import java.util.List; import java.util.stream.Stream; @@ -110,7 +111,7 @@ public void grobidPerformSearchWithInvalidDataTest(String invalidInput) throws F @Test public void performSearchThrowsExceptionInCaseOfConnectionIssues() throws IOException { GrobidService grobidServiceMock = mock(GrobidService.class); - when(grobidServiceMock.processCitation(anyString(), any())).thenThrow(new IOException("Any IO Exception")); + when(grobidServiceMock.processCitation(anyString(), any())).thenThrow(new SocketTimeoutException("Timeout")); grobidCitationFetcher = new GrobidCitationFetcher(importFormatPreferences, grobidServiceMock); assertThrows(FetcherException.class, () -> { diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GvkFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GvkFetcherTest.java index d1af17c5be0..88cd4b91ef0 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GvkFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GvkFetcherTest.java @@ -1,18 +1,19 @@ package org.jabref.logic.importer.fetcher; -import java.net.MalformedURLException; -import java.net.URISyntaxException; import java.net.URL; import java.util.Collections; import java.util.List; import org.jabref.logic.importer.FetcherException; +import org.jabref.logic.importer.fetcher.transformators.AbstractQueryTransformer; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.field.UnknownField; import org.jabref.model.entry.types.StandardEntryType; import org.jabref.testutils.category.FetcherTest; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -64,31 +65,19 @@ public void testGetName() { } @Test - public void simpleSearchQueryStringCorrect() { + public void simpleSearchQueryURLCorrect() throws Exception { String query = "java jdk"; - String result = fetcher.getSearchQueryString(query); - assertEquals("pica.all=java jdk", result); + QueryNode luceneQuery = new StandardSyntaxParser().parse(query, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + URL url = fetcher.getURLForQuery(luceneQuery); + assertEquals("http://sru.gbv.de/gvk?version=1.1&operation=searchRetrieve&query=pica.all%3D%22java%22+and+pica.all%3D%22jdk%22&maximumRecords=50&recordSchema=picaxml&sortKeys=Year%2C%2C1", url.toString()); } @Test - public void simpleSearchQueryURLCorrect() throws MalformedURLException, URISyntaxException, FetcherException { - String query = "java jdk"; - URL url = fetcher.getURLForQuery(query); - assertEquals("http://sru.gbv.de/gvk?version=1.1&operation=searchRetrieve&query=pica.all%3Djava+jdk&maximumRecords=50&recordSchema=picaxml&sortKeys=Year%2C%2C1", url.toString()); - } - - @Test - public void complexSearchQueryStringCorrect() { - String query = "kon java tit jdk"; - String result = fetcher.getSearchQueryString(query); - assertEquals("pica.kon=java and pica.tit=jdk", result); - } - - @Test - public void complexSearchQueryURLCorrect() throws MalformedURLException, URISyntaxException, FetcherException { - String query = "kon java tit jdk"; - URL url = fetcher.getURLForQuery(query); - assertEquals("http://sru.gbv.de/gvk?version=1.1&operation=searchRetrieve&query=pica.kon%3Djava+and+pica.tit%3Djdk&maximumRecords=50&recordSchema=picaxml&sortKeys=Year%2C%2C1", url.toString()); + public void complexSearchQueryURLCorrect() throws Exception { + String query = "kon:java tit:jdk"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(query, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + URL url = fetcher.getURLForQuery(luceneQuery); + assertEquals("http://sru.gbv.de/gvk?version=1.1&operation=searchRetrieve&query=pica.kon%3D%22java%22+and+pica.tit%3D%22jdk%22&maximumRecords=50&recordSchema=picaxml&sortKeys=Year%2C%2C1", url.toString()); } @Test diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java b/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java index a063519bb8d..93224f5e8b0 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IEEETest.java @@ -85,20 +85,21 @@ void notFoundByDOI() throws Exception { @Test void searchResultHasNoKeywordTerms() throws Exception { BibEntry expected = new BibEntry(StandardEntryType.Article) - .withField(StandardField.AUTHOR, "Shatakshi Jha and Ikhlaq Hussain and Bhim Singh and Sukumar Mishra") - .withField(StandardField.DATE, "25 2 2019") - .withField(StandardField.YEAR, "2019") - .withField(StandardField.DOI, "10.1049/iet-rpg.2018.5648") - .withField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8636659:PDF") - .withField(StandardField.ISSUE, "3") - .withField(StandardField.ISSN, "1752-1424") - .withField(StandardField.JOURNALTITLE, "IET Renewable Power Generation") - .withField(StandardField.PAGES, "418--426") - .withField(StandardField.PUBLISHER, "IET") - .withField(StandardField.TITLE, "Optimal operation of PV-DG-battery based microgrid with power quality conditioner") - .withField(StandardField.VOLUME, "13"); - - List fetchedEntries = fetcher.performSearch("8636659"); // article number + .withField(StandardField.AUTHOR, "Shatakshi Sharma and Bhim Singh and Sukumar Mishra") + .withField(StandardField.DATE, "April 2020") + .withField(StandardField.YEAR, "2020") + .withField(StandardField.DOI, "10.1109/TII.2019.2935531") + .withField(StandardField.FILE, ":https\\://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8801912:PDF") + .withField(StandardField.ISSUE, "4") + .withField(StandardField.ISSN, "1941-0050") + .withField(StandardField.JOURNALTITLE, "IEEE Transactions on Industrial Informatics") + .withField(StandardField.PAGES, "2346--2356") + .withField(StandardField.PUBLISHER, "IEEE") + .withField(StandardField.TITLE, "Economic Operation and Quality Control in PV-BES-DG-Based Autonomous System") + .withField(StandardField.VOLUME, "16") + .withField(StandardField.KEYWORDS, "Batteries, Generators, Economics, Power quality, State of charge, Harmonic analysis, Control systems, Battery, diesel generator (DG), distributed generation, power quality, photovoltaic (PV), voltage source converter (VSC)"); + + List fetchedEntries = fetcher.performSearch("article_number:8801912"); // article number fetchedEntries.forEach(entry -> entry.clearField(StandardField.ABSTRACT)); // Remove abstract due to copyright); assertEquals(Collections.singletonList(expected), fetchedEntries); } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java index ff8c597705b..43d5377fa64 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java @@ -59,7 +59,7 @@ public void searchByIdentifierFindsEntry() throws Exception { .withField(StandardField.EPRINT, "hep-ph/9802379") .withField(StandardField.ARCHIVEPREFIX, "arXiv") .withField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10"); - List fetchedEntries = fetcher.performSearch("hep-ph/9802379"); + List fetchedEntries = fetcher.performSearch("\"hep-ph/9802379\""); assertEquals(Collections.singletonList(article), fetchedEntries); } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java index c31bd348b0c..a5bd223cdf2 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SearchBasedFetcherCapabilityTest.java @@ -3,6 +3,7 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.StringJoiner; import java.util.stream.Collectors; import org.jabref.logic.importer.ImportCleanup; @@ -31,10 +32,10 @@ interface SearchBasedFetcherCapabilityTest { */ @Test default void supportsAuthorSearch() throws Exception { - ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); - getTestAuthors().forEach(builder::author); + StringJoiner queryBuilder = new StringJoiner("\" AND author:\"", "author:\"", "\""); + getTestAuthors().forEach(queryBuilder::add); - List result = getFetcher().performSearch(builder.build()); + List result = getFetcher().performSearch(queryBuilder.toString()); new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); assertFalse(result.isEmpty()); @@ -51,12 +52,7 @@ default void supportsAuthorSearch() throws Exception { */ @Test default void supportsYearSearch() throws Exception { - ComplexSearchQuery complexSearchQuery = ComplexSearchQuery - .builder() - .singleYear(getTestYear()) - .build(); - - List result = getFetcher().performSearch(complexSearchQuery); + List result = getFetcher().performSearch("year:" + getTestYear()); new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); List differentYearsInResult = result.stream() .map(bibEntry -> bibEntry.getField(StandardField.YEAR)) @@ -73,11 +69,9 @@ default void supportsYearSearch() throws Exception { */ @Test default void supportsYearRangeSearch() throws Exception { - ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); List yearsInYearRange = List.of("2018", "2019", "2020"); - builder.fromYearAndToYear(2018, 2020); - List result = getFetcher().performSearch(builder.build()); + List result = getFetcher().performSearch("year-range:2018-2020"); new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); List differentYearsInResult = result.stream() .map(bibEntry -> bibEntry.getField(StandardField.YEAR)) @@ -94,9 +88,7 @@ default void supportsYearRangeSearch() throws Exception { */ @Test default void supportsJournalSearch() throws Exception { - ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder(); - builder.journal(getTestJournal()); - List result = getFetcher().performSearch(builder.build()); + List result = getFetcher().performSearch("journal:\"" + getTestJournal() + "\""); new ImportCleanup(BibDatabaseMode.BIBTEX).doPostCleanup(result); assertFalse(result.isEmpty()); diff --git a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java index 74e595c777a..4774e1250e2 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/SpringerFetcherTest.java @@ -140,12 +140,12 @@ public SearchBasedFetcher getFetcher() { @Override public List getTestAuthors() { - return List.of("\"Steinmacher, Igor\"", "\"Gerosa, Marco\"", "\"Conte, Tayana U.\""); + return List.of("Steinmacher, Igor", "Gerosa, Marco", "Conte, Tayana U."); } @Override public String getTestJournal() { - return "\"Clinical Research in Cardiology\""; + return "Clinical Research in Cardiology"; } @Override diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformerTest.java new file mode 100644 index 00000000000..ac5933ae7bb --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ArXivQueryTransformerTest.java @@ -0,0 +1,60 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class ArXivQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new ArXivQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "au:"; + } + + @Override + public String getUnFieldedPrefix() { + return "all:"; + } + + @Override + public String getJournalPrefix() { + return "jr:"; + } + + @Override + public String getTitlePrefix() { + return "ti:"; + } + + @Override + public void convertYearField() throws Exception { + ArXivQueryTransformer transformer = ((ArXivQueryTransformer) getTransformator()); + String queryString = "2018"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional query = transformer.transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(queryString); + assertEquals(expected, query); + assertEquals(2018, transformer.getStartYear()); + assertEquals(2018, transformer.getEndYear()); + } + + @Override + public void convertYearRangeField() throws Exception { + ArXivQueryTransformer transformer = ((ArXivQueryTransformer) getTransformator()); + + String queryString = "year-range:2018-2021"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals(2018, transformer.getStartYear()); + assertEquals(2021, transformer.getEndYear()); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformerTest.java new file mode 100644 index 00000000000..d531f19da9d --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/DBLPQueryTransformerTest.java @@ -0,0 +1,54 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class DBLPQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new DBLPQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return ""; + } + + @Override + public String getUnFieldedPrefix() { + return ""; + } + + @Override + public String getJournalPrefix() { + return ""; + } + + @Override + public String getTitlePrefix() { + return ""; + } + + @Override + public void convertYearField() throws Exception { + String queryString = "year:2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of("2015"); + assertEquals(expected, searchQuery); + } + + @Override + public void convertYearRangeField() throws Exception { + String queryString = "year-range:2012-2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of("2012|2013|2014|2015"); + assertEquals(expected, searchQuery); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformerTest.java new file mode 100644 index 00000000000..9dab863dc10 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/GVKQueryTransformerTest.java @@ -0,0 +1,54 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.junit.jupiter.api.Disabled; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class GVKQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new GVKQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "pica.per="; + } + + @Override + public String getUnFieldedPrefix() { + return "pica.all="; + } + + @Override + public String getJournalPrefix() { + return "pica.zti="; + } + + @Override + public String getTitlePrefix() { + return "pica.tit="; + } + + @Override + public void convertYearField() throws Exception { + + String queryString = "year:2018"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional query = getTransformator().transformLuceneQuery(luceneQuery); + + Optional expected = Optional.of("ver:2018"); + assertEquals(expected, query); + } + + @Disabled("Not supported by GVK") + @Override + public void convertYearRangeField() throws Exception { + + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformerTest.java new file mode 100644 index 00000000000..7d2042a34e3 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/IEEEQueryTransformerTest.java @@ -0,0 +1,70 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class IEEEQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new IEEEQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "author:"; + } + + @Override + public String getUnFieldedPrefix() { + return ""; + } + + @Override + public String getJournalPrefix() { + return "publication_title:"; + } + + @Override + public String getTitlePrefix() { + return "article_title:"; + } + + @Override + public void convertJournalField() throws Exception { + IEEEQueryTransformer transformer = ((IEEEQueryTransformer) getTransformator()); + + String queryString = "journal:Nature"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals("\"Nature\"", transformer.getJournal().get()); + } + + @Override + public void convertYearField() throws Exception { + IEEEQueryTransformer transformer = ((IEEEQueryTransformer) getTransformator()); + + String queryString = "year:2021"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals(2021, transformer.getStartYear()); + assertEquals(2021, transformer.getEndYear()); + } + + @Override + public void convertYearRangeField() throws Exception { + + IEEEQueryTransformer transformer = ((IEEEQueryTransformer) getTransformator()); + + String queryString = "year-range:2018-2021"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals(2018, transformer.getStartYear()); + assertEquals(2021, transformer.getEndYear()); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/InfixTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/InfixTransformerTest.java new file mode 100644 index 00000000000..f986a04c7e0 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/InfixTransformerTest.java @@ -0,0 +1,97 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Test Interface for all transformers that use infix notation for their logical binary operators + */ +public interface InfixTransformerTest { + + AbstractQueryTransformer getTransformator(); + + /* All prefixes have to include the used separator + * Example in the case of ':': "author:" + */ + String getAuthorPrefix(); + + String getUnFieldedPrefix(); + + String getJournalPrefix(); + + String getTitlePrefix(); + + @Test + default void convertAuthorField() throws Exception { + String queryString = "author:\"Igor Steinmacher\""; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getAuthorPrefix() + "\"Igor Steinmacher\""); + assertEquals(expected, searchQuery); + } + + @Test + default void convertUnFieldedTerm() throws Exception { + String queryString = "\"default value\""; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getUnFieldedPrefix() + queryString); + assertEquals(expected, searchQuery); + } + + @Test + default void convertExplicitUnFieldedTerm() throws Exception { + String queryString = "default:\"default value\""; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getUnFieldedPrefix() + "\"default value\""); + assertEquals(expected, searchQuery); + } + + @Test + default void convertJournalField() throws Exception { + String queryString = "journal:Nature"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getJournalPrefix() + "\"Nature\""); + assertEquals(expected, searchQuery); + } + + @Test + void convertYearField() throws Exception; + + @Test + void convertYearRangeField() throws Exception; + + @Test + default void convertMultipleValuesWithTheSameField() throws Exception { + String queryString = "author:\"Igor Steinmacher\" author:\"Christoph Treude\""; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getAuthorPrefix() + "\"Igor Steinmacher\"" + getTransformator().getLogicalAndOperator() + getAuthorPrefix() + "\"Christoph Treude\""); + assertEquals(expected, searchQuery); + } + + @Test + default void groupedOperations() throws Exception { + String queryString = "(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\" AND author:\"Christoph Freunde\") AND title:test"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of("(" + getAuthorPrefix() + "\"Igor Steinmacher\"" + getTransformator().getLogicalOrOperator() + "(" + getAuthorPrefix() + "\"Christoph Treude\"" + getTransformator().getLogicalAndOperator() + getAuthorPrefix() + "\"Christoph Freunde\"))" + getTransformator().getLogicalAndOperator() + getTitlePrefix() + "\"test\""); + assertEquals(expected, searchQuery); + } + + @Test + default void notOperator() throws Exception { + String queryString = "!(author:\"Igor Steinmacher\" OR author:\"Christoph Treude\")"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of(getTransformator().getLogicalNotOperator() + "(" + getAuthorPrefix() + "\"Igor Steinmacher\"" + getTransformator().getLogicalOrOperator() + getAuthorPrefix() + "\"Christoph Treude\")"); + assertEquals(expected, searchQuery); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformerTest.java new file mode 100644 index 00000000000..e8a5a6014b9 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/JstorQueryTransformerTest.java @@ -0,0 +1,39 @@ +package org.jabref.logic.importer.fetcher.transformators; + +class JstorQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new JstorQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "au:"; + } + + @Override + public String getUnFieldedPrefix() { + return ""; + } + + @Override + public String getJournalPrefix() { + return "pt:"; + } + + @Override + public String getTitlePrefix() { + return "ti:"; + } + + @Override + public void convertYearField() throws Exception { + + } + + @Override + public void convertYearRangeField() throws Exception { + + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformerTest.java new file mode 100644 index 00000000000..e83329b893a --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ScholarQueryTransformerTest.java @@ -0,0 +1,59 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class ScholarQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new ScholarQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "author:"; + } + + @Override + public String getUnFieldedPrefix() { + return ""; + } + + @Override + public String getJournalPrefix() { + return "source:"; + } + + @Override + public String getTitlePrefix() { + return "allintitle:"; + } + + @Override + public void convertYearField() throws Exception { + ScholarQueryTransformer transformer = ((ScholarQueryTransformer) getTransformator()); + + String queryString = "year:2021"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals(2021, transformer.getStartYear()); + assertEquals(2021, transformer.getEndYear()); + } + + @Override + public void convertYearRangeField() throws Exception { + + ScholarQueryTransformer transformer = ((ScholarQueryTransformer) getTransformator()); + + String queryString = "year-range:2018-2021"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + transformer.transformLuceneQuery(luceneQuery); + + assertEquals(2018, transformer.getStartYear()); + assertEquals(2021, transformer.getEndYear()); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformerTest.java new file mode 100644 index 00000000000..c0fbd484251 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/SpringerQueryTransformerTest.java @@ -0,0 +1,56 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class SpringerQueryTransformerTest implements InfixTransformerTest { + + @Override + public String getAuthorPrefix() { + return "name:"; + } + + @Override + public AbstractQueryTransformer getTransformator() { + return new SpringerQueryTransformer(); + } + + @Override + public String getUnFieldedPrefix() { + return ""; + } + + @Override + public String getJournalPrefix() { + return "journal:"; + } + + @Override + public String getTitlePrefix() { + return "title:"; + } + + @Override + public void convertYearField() throws Exception { + String queryString = "year:2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + + Optional expected = Optional.of("date:2015*"); + assertEquals(expected, searchQuery); + } + + @Override + public void convertYearRangeField() throws Exception { + String queryString = "year-range:2012-2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + + Optional expected = Optional.of("date:2012* OR date:2013* OR date:2014* OR date:2015*"); + assertEquals(expected, searchQuery); + } +} diff --git a/src/test/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformerTest.java b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformerTest.java new file mode 100644 index 00000000000..73737455534 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fetcher/transformators/ZbMathQueryTransformerTest.java @@ -0,0 +1,54 @@ +package org.jabref.logic.importer.fetcher.transformators; + +import java.util.Optional; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class ZbMathQueryTransformerTest implements InfixTransformerTest { + + @Override + public AbstractQueryTransformer getTransformator() { + return new ZbMathQueryTransformer(); + } + + @Override + public String getAuthorPrefix() { + return "au:"; + } + + @Override + public String getUnFieldedPrefix() { + return "any:"; + } + + @Override + public String getJournalPrefix() { + return "so:"; + } + + @Override + public String getTitlePrefix() { + return "ti:"; + } + + @Override + public void convertYearField() throws Exception { + String queryString = "year:2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of("py:2015"); + assertEquals(expected, searchQuery); + } + + @Override + public void convertYearRangeField() throws Exception { + String queryString = "year-range:2012-2015"; + QueryNode luceneQuery = new StandardSyntaxParser().parse(queryString, AbstractQueryTransformer.NO_EXPLICIT_FIELD); + Optional searchQuery = getTransformator().transformLuceneQuery(luceneQuery); + Optional expected = Optional.of("py:2012-2015"); + assertEquals(expected, searchQuery); + } +} From ec88998eb8456295d5360964876b192d01002ec3 Mon Sep 17 00:00:00 2001 From: Christoph Date: Fri, 29 Jan 2021 22:11:29 +0100 Subject: [PATCH 05/15] Fix File Filter and some layout issues (#7385) * Fix File Filter and some layout issues Fixes part of #7383 * better min width * Any file instead of all * remove style class * l10n Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com> --- src/main/java/org/jabref/gui/Base.css | 2 +- .../externalfiles/FileExtensionViewModel.java | 2 +- .../externalfiles/UnlinkedFilesCrawler.java | 2 +- .../externalfiles/UnlinkedFilesDialog.fxml | 14 ++-- .../externalfiles/UnlinkedPDFFileFilter.java | 8 +- .../java/org/jabref/logic/util/FileType.java | 2 + .../jabref/logic/util/StandardFileType.java | 78 ++++++++++--------- .../jabref/logic/util/UnknownFileType.java | 5 ++ .../logic/util/io/DatabaseFileLookup.java | 6 +- src/main/resources/l10n/JabRef_en.properties | 2 +- 10 files changed, 70 insertions(+), 51 deletions(-) diff --git a/src/main/java/org/jabref/gui/Base.css b/src/main/java/org/jabref/gui/Base.css index bf6f8fb5612..ffe7cd3f04f 100644 --- a/src/main/java/org/jabref/gui/Base.css +++ b/src/main/java/org/jabref/gui/Base.css @@ -1217,6 +1217,6 @@ TextFlow * { } -.mainTable-header{ +.mainTable-header { -fx-fill: -fx-mid-text-color; } diff --git a/src/main/java/org/jabref/gui/externalfiles/FileExtensionViewModel.java b/src/main/java/org/jabref/gui/externalfiles/FileExtensionViewModel.java index f1f51862c30..121517bf5bc 100644 --- a/src/main/java/org/jabref/gui/externalfiles/FileExtensionViewModel.java +++ b/src/main/java/org/jabref/gui/externalfiles/FileExtensionViewModel.java @@ -19,7 +19,7 @@ public class FileExtensionViewModel { private final ExternalFileTypes externalFileTypes; FileExtensionViewModel(FileType fileType, ExternalFileTypes externalFileTypes) { - this.description = Localization.lang("%0 file", fileType.toString()); + this.description = Localization.lang("%0 file", fileType.getName()); this.extensions = fileType.getExtensionsWithDot(); this.externalFileTypes = externalFileTypes; } diff --git a/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesCrawler.java b/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesCrawler.java index 60d89246e1b..4b29a57204f 100644 --- a/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesCrawler.java +++ b/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesCrawler.java @@ -74,7 +74,7 @@ private FileNodeViewModel searchDirectory(Path directory, UnlinkedPDFFileFilter Map> fileListPartition; try (Stream filesStream = StreamSupport.stream(Files.newDirectoryStream(directory, fileFilter).spliterator(), false)) { - fileListPartition = filesStream.collect(Collectors.partitioningBy(path -> path.toFile().isDirectory())); + fileListPartition = filesStream.collect(Collectors.partitioningBy(Files::isDirectory)); } catch (IOException e) { LOGGER.error(String.format("%s while searching files: %s", e.getClass().getName(), e.getMessage())); return parent; diff --git a/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesDialog.fxml b/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesDialog.fxml index 0d3246da39d..bea10fcc9c3 100644 --- a/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesDialog.fxml +++ b/src/main/java/org/jabref/gui/externalfiles/UnlinkedFilesDialog.fxml @@ -34,14 +34,14 @@ - - + +