diff --git a/.gitignore b/.gitignore index c94289d0f06..c0e5370009f 100644 --- a/.gitignore +++ b/.gitignore @@ -581,3 +581,5 @@ CHANGELOG.html # some strange gradle/IntelliJ extension extension 'reporting' property 'baseDirectory' + +tessdata/ diff --git a/jabgui/src/main/java/org/jabref/gui/fieldeditors/LinkedFilesEditor.java b/jabgui/src/main/java/org/jabref/gui/fieldeditors/LinkedFilesEditor.java index ff6992be048..554c58cae8a 100644 --- a/jabgui/src/main/java/org/jabref/gui/fieldeditors/LinkedFilesEditor.java +++ b/jabgui/src/main/java/org/jabref/gui/fieldeditors/LinkedFilesEditor.java @@ -325,7 +325,8 @@ private void handleItemMouseClick(LinkedFileViewModel linkedFile, MouseEvent eve bibEntry, viewModel, contextCommandFactory, - multiContextCommandFactory + multiContextCommandFactory, + taskExecutor ); ContextMenu contextMenu = contextMenuFactory.createForSelection(listView.getSelectionModel().getSelectedItems()); diff --git a/jabgui/src/main/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactory.java b/jabgui/src/main/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactory.java index a6b3262450e..9c05875e180 100644 --- a/jabgui/src/main/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactory.java +++ b/jabgui/src/main/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactory.java @@ -2,6 +2,7 @@ import javafx.collections.ObservableList; import javafx.scene.control.ContextMenu; +import javafx.scene.control.MenuItem; import javafx.scene.control.SeparatorMenuItem; import org.jabref.gui.DialogService; @@ -10,7 +11,10 @@ import org.jabref.gui.copyfiles.CopySingleFileAction; import org.jabref.gui.fieldeditors.LinkedFileViewModel; import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel; +import org.jabref.gui.linkedfile.OcrAction; import org.jabref.gui.preferences.GuiPreferences; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.TaskExecutor; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; @@ -25,6 +29,7 @@ public class ContextMenuFactory { private final LinkedFilesEditorViewModel viewModel; private final SingleContextCommandFactory singleCommandFactory; private final MultiContextCommandFactory multiCommandFactory; + private final TaskExecutor taskExecutor; public ContextMenuFactory(DialogService dialogService, GuiPreferences preferences, @@ -32,7 +37,8 @@ public ContextMenuFactory(DialogService dialogService, ObservableOptionalValue bibEntry, LinkedFilesEditorViewModel viewModel, SingleContextCommandFactory singleCommandFactory, - MultiContextCommandFactory multiCommandFactory) { + MultiContextCommandFactory multiCommandFactory, + TaskExecutor taskExecutor) { this.dialogService = dialogService; this.preferences = preferences; this.databaseContext = databaseContext; @@ -40,6 +46,7 @@ public ContextMenuFactory(DialogService dialogService, this.viewModel = viewModel; this.singleCommandFactory = singleCommandFactory; this.multiCommandFactory = multiCommandFactory; + this.taskExecutor = taskExecutor; } public ContextMenu createForSelection(ObservableList selectedFiles) { @@ -86,9 +93,45 @@ private ContextMenu createContextMenuForFile(LinkedFileViewModel linkedFile) { factory.createMenuItem(StandardActions.DELETE_FILE, singleCommandFactory.build(StandardActions.DELETE_FILE, linkedFile)) ); + // Add OCR menu item for PDF files + if (linkedFile.getFile().getFileType().equalsIgnoreCase("pdf")) { + menu.getItems().add(new SeparatorMenuItem()); + + MenuItem ocrItem = createOcrMenuItem(linkedFile); + menu.getItems().add(ocrItem); + } + return menu; } + /** + * Creates the OCR menu item for a PDF file. + * The menu item is only enabled if the PDF file exists on disk. + * + * @param linkedFile The linked PDF file + * @return MenuItem configured for OCR action + */ + private MenuItem createOcrMenuItem(LinkedFileViewModel linkedFile) { + MenuItem ocrItem = new MenuItem(Localization.lang("Extract text (OCR)")); + + // Create the OCR action + OcrAction ocrAction = new OcrAction( + linkedFile.getFile(), + databaseContext, + dialogService, + preferences.getFilePreferences(), + taskExecutor + ); + + // Set the action to execute when clicked + ocrItem.setOnAction(event -> ocrAction.execute()); + + // Disable if the action is not executable (file doesn't exist) + ocrItem.disableProperty().bind(ocrAction.executableProperty().not()); + + return ocrItem; + } + @FunctionalInterface public interface SingleContextCommandFactory { ContextAction build(StandardActions action, LinkedFileViewModel file); diff --git a/jabgui/src/main/java/org/jabref/gui/linkedfile/OcrAction.java b/jabgui/src/main/java/org/jabref/gui/linkedfile/OcrAction.java new file mode 100644 index 00000000000..2d5f22773f4 --- /dev/null +++ b/jabgui/src/main/java/org/jabref/gui/linkedfile/OcrAction.java @@ -0,0 +1,106 @@ +package org.jabref.gui.linkedfile; + +import org.jabref.gui.DialogService; +import org.jabref.gui.StateManager; +import org.jabref.gui.actions.Action; +import org.jabref.gui.actions.ActionHelper; +import org.jabref.gui.actions.SimpleCommand; +import org.jabref.logic.util.BackgroundTask; +import org.jabref.logic.util.TaskExecutor; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.ocr.OcrService; +import org.jabref.logic.ocr.OcrException; +import org.jabref.model.database.BibDatabaseContext; +import org.jabref.model.entry.LinkedFile; +import org.jabref.logic.FilePreferences; + +import java.nio.file.Path; +import java.util.Optional; + +/** + * Action for performing OCR (Optical Character Recognition) on linked PDF files. + *

+ * This action extracts text content from PDF files that are attached to BibTeX entries. + * It runs the OCR process in a background thread to keep the UI responsive and provides + * user feedback through dialogs and notifications. + *

+ * The action follows JabRef's command pattern and can be triggered from context menus. + * It includes built-in validation to ensure it's only enabled for PDF files that exist on disk. + * + * @see OcrService + * @see org.jabref.gui.actions.SimpleCommand + */ + +// Personal Note: Add more doc in between later + +public class OcrAction extends SimpleCommand { + + private final LinkedFile linkedFile; + private final BibDatabaseContext databaseContext; + private final DialogService dialogService; + private final FilePreferences filePreferences; + private final TaskExecutor taskExecutor; + + public OcrAction(LinkedFile linkedFile, + BibDatabaseContext databaseContext, + DialogService dialogService, + FilePreferences filePreferences, + TaskExecutor taskExecutor) { + this.linkedFile = linkedFile; + this.databaseContext = databaseContext; + this.dialogService = dialogService; + this.filePreferences = filePreferences; + this.taskExecutor = taskExecutor; + + // Only executable for existing PDF files + this.executable.set( + linkedFile.getFileType().equalsIgnoreCase("pdf") && + linkedFile.findIn(databaseContext, filePreferences).isPresent() + ); + } + + @Override + public void execute() { + Optional filePath = linkedFile.findIn(databaseContext, filePreferences); + + if (filePath.isEmpty()) { + dialogService.showErrorDialogAndWait( + Localization.lang("File not found"), + Localization.lang("Could not locate the PDF file on disk.") + ); + return; + } + + dialogService.notify(Localization.lang("Performing OCR...")); + + BackgroundTask.wrap(() -> { + OcrService ocrService = new OcrService(); + return ocrService.performOcr(filePath.get()); + }) + .onSuccess(extractedText -> { + if (extractedText.isEmpty()) { + dialogService.showInformationDialogAndWait( + Localization.lang("OCR Complete"), + Localization.lang("No text was found in the PDF.") + ); + } else { + // For now, just show preview + String preview = extractedText.length() > 1000 + ? extractedText.substring(0, 1000) + "..." + : extractedText; + + dialogService.showInformationDialogAndWait( + Localization.lang("OCR Result"), + preview + ); + } + }) + .onFailure(exception -> { + dialogService.showErrorDialogAndWait( + Localization.lang("OCR failed"), + exception.getMessage() + ); + }) + .executeWith(taskExecutor); + } +} diff --git a/jabgui/src/test/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactoryTest.java b/jabgui/src/test/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactoryTest.java index 141bc6e44e5..ec777d2b379 100644 --- a/jabgui/src/test/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactoryTest.java +++ b/jabgui/src/test/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactoryTest.java @@ -11,6 +11,7 @@ import org.jabref.gui.fieldeditors.LinkedFileViewModel; import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel; import org.jabref.gui.preferences.GuiPreferences; +import org.jabref.logic.util.TaskExecutor; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.LinkedFile; @@ -41,6 +42,7 @@ public class ContextMenuFactoryTest { private ContextMenuFactory factory; private ContextMenuFactory.SingleContextCommandFactory singleCommandFactory; private ContextMenuFactory.MultiContextCommandFactory multiCommandFactory; + private TaskExecutor taskExecutor; @BeforeAll public static void initToolkit() { @@ -78,7 +80,8 @@ public void setUp() { bibEntry, viewModel, singleCommandFactory, - multiCommandFactory + multiCommandFactory, + taskExecutor ); } diff --git a/jablib/build.gradle.kts b/jablib/build.gradle.kts index ece09d14ad5..0042678cc44 100644 --- a/jablib/build.gradle.kts +++ b/jablib/build.gradle.kts @@ -253,6 +253,9 @@ dependencies { // Required for LocalizationConsistencyTest testImplementation("org.testfx:testfx-core:4.0.16-alpha") testImplementation("org.testfx:testfx-junit5:4.0.16-alpha") + + // OCR support + implementation("net.sourceforge.tess4j:tess4j:5.15.0") } /* jacoco { diff --git a/jablib/src/main/java/module-info.java b/jablib/src/main/java/module-info.java index ba63449bb8a..03f35d9e604 100644 --- a/jablib/src/main/java/module-info.java +++ b/jablib/src/main/java/module-info.java @@ -105,6 +105,7 @@ exports org.jabref.logic.crawler; exports org.jabref.logic.git; exports org.jabref.logic.pseudonymization; + exports org.jabref.logic.ocr; exports org.jabref.logic.citation.repository; requires java.base; @@ -252,5 +253,6 @@ requires mslinks; requires org.antlr.antlr4.runtime; requires org.libreoffice.uno; + requires tess4j; // endregion } diff --git a/jablib/src/main/java/org/jabref/logic/ocr/OcrException.java b/jablib/src/main/java/org/jabref/logic/ocr/OcrException.java new file mode 100644 index 00000000000..808f4d93a7a --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/ocr/OcrException.java @@ -0,0 +1,28 @@ +package org.jabref.logic.ocr; + +/** + * Exception thrown when OCR operations fail. + * This exception wraps lower-level OCR engine exceptions to provide + * a consistent interface for error handling throughout JabRef. + */ +public class OcrException extends Exception { + + /** + * Constructs an OcrException with a message and underlying cause. + * + * @param message Descriptive error message + * @param cause The underlying exception that caused this error + */ + public OcrException(String message, Throwable cause) { + super(message, cause); + } + + /** + * Constructs an OcrException with only a message. + * + * @param message Descriptive error message + */ + public OcrException(String message) { + super(message); + } +} diff --git a/jablib/src/main/java/org/jabref/logic/ocr/OcrResult.java b/jablib/src/main/java/org/jabref/logic/ocr/OcrResult.java new file mode 100644 index 00000000000..8cbe573e938 --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/ocr/OcrResult.java @@ -0,0 +1,35 @@ +package org.jabref.logic.ocr; + +import java.util.Optional; + +public class OcrResult { + private final boolean success; + private final String text; + private final String errorMessage; + + private OcrResult(boolean success, String text, String errorMessage) { + this.success = success; + this.text = text; + this.errorMessage = errorMessage; + } + + public static OcrResult success(String text) { + return new OcrResult(true, text, null); + } + + public static OcrResult failure(String errorMessage) { + return new OcrResult(false, null, errorMessage); + } + + public boolean isSuccess() { + return success; + } + + public Optional getText() { + return Optional.ofNullable(text); + } + + public Optional getErrorMessage() { + return Optional.ofNullable(errorMessage); + } +} diff --git a/jablib/src/main/java/org/jabref/logic/ocr/OcrService.java b/jablib/src/main/java/org/jabref/logic/ocr/OcrService.java new file mode 100644 index 00000000000..37714273e6d --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/ocr/OcrService.java @@ -0,0 +1,143 @@ +package org.jabref.logic.ocr; + +import java.io.File; +import java.nio.file.Path; + +import org.jabref.model.strings.StringUtil; + +import com.sun.jna.Platform; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Service for performing Optical Character Recognition (OCR) on PDF files. + * This class provides a high-level interface to OCR functionality, + * abstracting away the specific OCR engine implementation details. + */ +public class OcrService { + private static final Logger LOGGER = LoggerFactory.getLogger(OcrService.class); + private static final String JNA_LIBRARY_PATH = "jna.library.path"; + private static final String TESSDATA_PREFIX = "TESSDATA_PREFIX"; + + // The OCR engine instance + private final Tesseract tesseract; + + /** + * Constructs a new OcrService with default settings. + * Currently uses Tesseract with English language support. + */ + public OcrService() throws OcrException { + configureLibraryPath(); + + try { + this.tesseract = new Tesseract(); + tesseract.setLanguage("eng"); + configureTessdata(); + LOGGER.debug("Initialized OcrService with Tesseract"); + } catch (Exception e) { + throw new OcrException("Failed to initialize OCR engine", e); + } + } + + private void configureLibraryPath() { + if (Platform.isMac()) { + String originalPath = System.getProperty(JNA_LIBRARY_PATH, ""); + if (Platform.isARM()) { + System.setProperty(JNA_LIBRARY_PATH, + originalPath + File.pathSeparator + "/opt/homebrew/lib/"); + } else { + System.setProperty(JNA_LIBRARY_PATH, + originalPath + File.pathSeparator + "/usr/local/cellar/"); + } + } + } + + private void configureTessdata() throws OcrException { + // First, check environment variable + String tessdataPath = System.getenv(TESSDATA_PREFIX); + + if (tessdataPath != null && !tessdataPath.isEmpty()) { + File tessdataDir = new File(tessdataPath); + if (tessdataDir.exists() && tessdataDir.isDirectory()) { + // Tesseract expects the parent directory of tessdata + if (tessdataDir.getName().equals("tessdata")) { + tesseract.setDatapath(tessdataDir.getParent()); + } else { + tesseract.setDatapath(tessdataPath); + } + LOGGER.info("Using tessdata from environment variable: {}", tessdataPath); + return; + } else { + LOGGER.warn("TESSDATA_PREFIX points to non-existent directory: {}", tessdataPath); + } + } + + // Fall back to system locations + String systemPath = findSystemTessdata(); + if (systemPath != null) { + tesseract.setDatapath(systemPath); + LOGGER.info("Using system tessdata at: {}", systemPath); + } else { + throw new OcrException("Could not find tessdata directory. Please set TESSDATA_PREFIX environment variable."); + } + } + + private String findSystemTessdata() { + String[] possiblePaths = { + "/usr/local/share", // Homebrew Intel + "/opt/homebrew/share", // Homebrew ARM + "/usr/share" // System + }; + + for (String path : possiblePaths) { + File tessdata = new File(path, "tessdata"); + File engData = new File(tessdata, "eng.traineddata"); + if (tessdata.exists() && engData.exists()) { + return path; // Return parent of tessdata + } + } + + return null; + } + + /** + * Performs OCR on a PDF file and returns the extracted text. + * + * @param pdfPath Path to the PDF file to process + * @return The extracted text, or empty string if no text found + * @throws OcrException if OCR processing fails + */ + public OcrResult performOcr(Path pdfPath) { + // User error - not an exception + if (pdfPath == null) { + LOGGER.warn("PDF path is null"); + return OcrResult.failure("No file path provided"); + } + + File pdfFile = pdfPath.toFile(); + + // User error - not an exception + if (!pdfFile.exists()) { + LOGGER.warn("PDF file does not exist: {}", pdfPath); + return OcrResult.failure("File does not exist: " + pdfPath.getFileName()); + } + + try { + LOGGER.info("Starting OCR for file: {}", pdfFile.getName()); + + String result = tesseract.doOCR(pdfFile); + result = StringUtil.isBlank(result) ? "" : result.trim(); + + LOGGER.info("OCR completed successfully. Extracted {} characters", result.length()); + return OcrResult.success(result); + + } catch (TesseractException e) { + // This could be either a user error (corrupt PDF) or our bug + // Log it as error but return as failure, not exception + LOGGER.error("OCR processing failed for file: {}", pdfFile.getName(), e); + return OcrResult.failure("Failed to extract text from PDF: " + e.getMessage()); + } + } +} diff --git a/jablib/src/main/resources/l10n/JabRef_en.properties b/jablib/src/main/resources/l10n/JabRef_en.properties index 433be54d054..f46d3c12cf2 100644 --- a/jablib/src/main/resources/l10n/JabRef_en.properties +++ b/jablib/src/main/resources/l10n/JabRef_en.properties @@ -316,6 +316,14 @@ Extract\ references\ from\ file\ (online)=Extract references from file (online) Extract\ References\ (offline)=Extract References (offline) Extract\ References\ (online)=Extract References (online) +Extract\ text\ (OCR)=Extract text (OCR) +Performing\ OCR...=Performing OCR... +OCR\ Complete=OCR Complete +OCR\ Result=OCR Result +OCR\ failed=OCR failed +No\ text\ was\ found\ in\ the\ PDF.=No text was found in the PDF. +Could\ not\ locate\ the\ PDF\ file\ on\ disk.=Could not locate the PDF file on disk. + Processing...=Processing... Processing\ "%0"...=Processing "%0"... Processing\ PDF(s)=Processing PDF(s)