Skip to content

OCR integration #13313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,5 @@ CHANGELOG.html

# some strange gradle/IntelliJ extension
extension 'reporting' property 'baseDirectory'

tessdata/
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,8 @@ private void handleItemMouseClick(LinkedFileViewModel linkedFile, MouseEvent eve
bibEntry,
viewModel,
contextCommandFactory,
multiContextCommandFactory
multiContextCommandFactory,
taskExecutor
);

ContextMenu contextMenu = contextMenuFactory.createForSelection(listView.getSelectionModel().getSelectedItems());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import javafx.collections.ObservableList;
import javafx.scene.control.ContextMenu;
import javafx.scene.control.MenuItem;
import javafx.scene.control.SeparatorMenuItem;

import org.jabref.gui.DialogService;
Expand All @@ -10,7 +11,10 @@
import org.jabref.gui.copyfiles.CopySingleFileAction;
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel;
import org.jabref.gui.linkedfile.OcrAction;
import org.jabref.gui.preferences.GuiPreferences;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.TaskExecutor;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;

Expand All @@ -25,21 +29,24 @@ public class ContextMenuFactory {
private final LinkedFilesEditorViewModel viewModel;
private final SingleContextCommandFactory singleCommandFactory;
private final MultiContextCommandFactory multiCommandFactory;
private final TaskExecutor taskExecutor;

public ContextMenuFactory(DialogService dialogService,
GuiPreferences preferences,
BibDatabaseContext databaseContext,
ObservableOptionalValue<BibEntry> bibEntry,
LinkedFilesEditorViewModel viewModel,
SingleContextCommandFactory singleCommandFactory,
MultiContextCommandFactory multiCommandFactory) {
MultiContextCommandFactory multiCommandFactory,
TaskExecutor taskExecutor) {
this.dialogService = dialogService;
this.preferences = preferences;
this.databaseContext = databaseContext;
this.bibEntry = bibEntry;
this.viewModel = viewModel;
this.singleCommandFactory = singleCommandFactory;
this.multiCommandFactory = multiCommandFactory;
this.taskExecutor = taskExecutor;
}

public ContextMenu createForSelection(ObservableList<LinkedFileViewModel> selectedFiles) {
Expand Down Expand Up @@ -86,9 +93,45 @@ private ContextMenu createContextMenuForFile(LinkedFileViewModel linkedFile) {
factory.createMenuItem(StandardActions.DELETE_FILE, singleCommandFactory.build(StandardActions.DELETE_FILE, linkedFile))
);

// Add OCR menu item for PDF files
if (linkedFile.getFile().getFileType().equalsIgnoreCase("pdf")) {
menu.getItems().add(new SeparatorMenuItem());

MenuItem ocrItem = createOcrMenuItem(linkedFile);
menu.getItems().add(ocrItem);
}

return menu;
}

/**
* Creates the OCR menu item for a PDF file.
* The menu item is only enabled if the PDF file exists on disk.
*
* @param linkedFile The linked PDF file
* @return MenuItem configured for OCR action
*/
private MenuItem createOcrMenuItem(LinkedFileViewModel linkedFile) {
MenuItem ocrItem = new MenuItem(Localization.lang("Extract text (OCR)"));

// Create the OCR action
OcrAction ocrAction = new OcrAction(
linkedFile.getFile(),
databaseContext,
dialogService,
preferences.getFilePreferences(),
taskExecutor
);

// Set the action to execute when clicked
ocrItem.setOnAction(event -> ocrAction.execute());

// Disable if the action is not executable (file doesn't exist)
ocrItem.disableProperty().bind(ocrAction.executableProperty().not());

return ocrItem;
}

@FunctionalInterface
public interface SingleContextCommandFactory {
ContextAction build(StandardActions action, LinkedFileViewModel file);
Expand Down
106 changes: 106 additions & 0 deletions jabgui/src/main/java/org/jabref/gui/linkedfile/OcrAction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package org.jabref.gui.linkedfile;

import org.jabref.gui.DialogService;
import org.jabref.gui.StateManager;
import org.jabref.gui.actions.Action;
import org.jabref.gui.actions.ActionHelper;
import org.jabref.gui.actions.SimpleCommand;
import org.jabref.logic.util.BackgroundTask;
import org.jabref.logic.util.TaskExecutor;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.ocr.OcrService;
import org.jabref.logic.ocr.OcrException;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.LinkedFile;
import org.jabref.logic.FilePreferences;

import java.nio.file.Path;
import java.util.Optional;

/**
* Action for performing OCR (Optical Character Recognition) on linked PDF files.
* <p>
* This action extracts text content from PDF files that are attached to BibTeX entries.
* It runs the OCR process in a background thread to keep the UI responsive and provides
* user feedback through dialogs and notifications.
* <p>
* The action follows JabRef's command pattern and can be triggered from context menus.
* It includes built-in validation to ensure it's only enabled for PDF files that exist on disk.
*
* @see OcrService
* @see org.jabref.gui.actions.SimpleCommand
*/

// Personal Note: Add more doc in between later

public class OcrAction extends SimpleCommand {

private final LinkedFile linkedFile;
private final BibDatabaseContext databaseContext;
private final DialogService dialogService;
private final FilePreferences filePreferences;
private final TaskExecutor taskExecutor;

public OcrAction(LinkedFile linkedFile,
BibDatabaseContext databaseContext,
DialogService dialogService,
FilePreferences filePreferences,
TaskExecutor taskExecutor) {
this.linkedFile = linkedFile;
this.databaseContext = databaseContext;
this.dialogService = dialogService;
this.filePreferences = filePreferences;
this.taskExecutor = taskExecutor;

// Only executable for existing PDF files
this.executable.set(
linkedFile.getFileType().equalsIgnoreCase("pdf") &&
linkedFile.findIn(databaseContext, filePreferences).isPresent()
);
}

@Override
public void execute() {
Optional<Path> filePath = linkedFile.findIn(databaseContext, filePreferences);

if (filePath.isEmpty()) {
dialogService.showErrorDialogAndWait(
Localization.lang("File not found"),
Localization.lang("Could not locate the PDF file on disk.")
);
return;
}

dialogService.notify(Localization.lang("Performing OCR..."));

BackgroundTask.wrap(() -> {
OcrService ocrService = new OcrService();
return ocrService.performOcr(filePath.get());
})
.onSuccess(extractedText -> {
if (extractedText.isEmpty()) {
dialogService.showInformationDialogAndWait(
Localization.lang("OCR Complete"),
Localization.lang("No text was found in the PDF.")
);
} else {
// For now, just show preview
String preview = extractedText.length() > 1000
? extractedText.substring(0, 1000) + "..."
: extractedText;

dialogService.showInformationDialogAndWait(
Localization.lang("OCR Result"),
preview
);
}
})
.onFailure(exception -> {
dialogService.showErrorDialogAndWait(
Localization.lang("OCR failed"),
exception.getMessage()
);
})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

before add
.showToUser(true) then it will be shown in the UI task list

.executeWith(taskExecutor);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel;
import org.jabref.gui.preferences.GuiPreferences;
import org.jabref.logic.util.TaskExecutor;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
Expand Down Expand Up @@ -41,6 +42,7 @@ public class ContextMenuFactoryTest {
private ContextMenuFactory factory;
private ContextMenuFactory.SingleContextCommandFactory singleCommandFactory;
private ContextMenuFactory.MultiContextCommandFactory multiCommandFactory;
private TaskExecutor taskExecutor;

@BeforeAll
public static void initToolkit() {
Expand Down Expand Up @@ -78,7 +80,8 @@ public void setUp() {
bibEntry,
viewModel,
singleCommandFactory,
multiCommandFactory
multiCommandFactory,
taskExecutor
);
}

Expand Down
3 changes: 3 additions & 0 deletions jablib/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ dependencies {
// Required for LocalizationConsistencyTest
testImplementation("org.testfx:testfx-core:4.0.16-alpha")
testImplementation("org.testfx:testfx-junit5:4.0.16-alpha")

// OCR support
implementation("net.sourceforge.tess4j:tess4j:5.15.0")
}
/*
jacoco {
Expand Down
2 changes: 2 additions & 0 deletions jablib/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
exports org.jabref.logic.crawler;
exports org.jabref.logic.git;
exports org.jabref.logic.pseudonymization;
exports org.jabref.logic.ocr;
exports org.jabref.logic.citation.repository;

requires java.base;
Expand Down Expand Up @@ -252,5 +253,6 @@
requires mslinks;
requires org.antlr.antlr4.runtime;
requires org.libreoffice.uno;
requires tess4j;
// endregion
}
28 changes: 28 additions & 0 deletions jablib/src/main/java/org/jabref/logic/ocr/OcrException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.jabref.logic.ocr;

/**
* Exception thrown when OCR operations fail.
* This exception wraps lower-level OCR engine exceptions to provide
* a consistent interface for error handling throughout JabRef.
*/
public class OcrException extends Exception {
Copy link
Member

@subhramit subhramit Jun 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea, only drawback I see is this may be hard to maintain/stay consistent with as the project grows or if external contributors wish to add something...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Best is to avoid exceptions being thrown at all, especially for expected possible stupid user behaviour. And if they are being thrown, keep them as informative as possible. The consistent interface is Exception at the end either way.

Copy link
Member

@subhramit subhramit Jun 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backstory: This was based on my experience in OO - all the JStyle classes and the OO GUI class had a custom error wrapper, OOError. There was also OOResult and OOVoidResult. I still don't know much about them, but for OOError, apart from just wrapping exceptions, it also acted as an interface for with localizing and displaying error messages. As obvious, when I was new, I found these hard to use. So I started by using native exceptions as thrown by the library methods during the CSL project.
It was convenient, and it worked, and people were fine with the inconsistency, so now 50% of OO uses that, 50% doesn't. I mentioned this as some free time refactoring in #11829, but never had the energy to change it.


/**
* Constructs an OcrException with a message and underlying cause.
*
* @param message Descriptive error message
* @param cause The underlying exception that caused this error
*/
public OcrException(String message, Throwable cause) {
super(message, cause);
}

/**
* Constructs an OcrException with only a message.
*
* @param message Descriptive error message
*/
public OcrException(String message) {
super(message);
}
}
87 changes: 87 additions & 0 deletions jablib/src/main/java/org/jabref/logic/ocr/OcrService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package org.jabref.logic.ocr;

import java.io.File;
import java.nio.file.Path;

import org.jabref.model.strings.StringUtil;

import com.sun.jna.Platform;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Service for performing Optical Character Recognition (OCR) on PDF files.
* This class provides a high-level interface to OCR functionality,
* abstracting away the specific OCR engine implementation details.
*/
public class OcrService {
private static final Logger LOGGER = LoggerFactory.getLogger(OcrService.class);
private static final String JNA_LIBRARY_PATH = "jna.library.path";
// The OCR engine instance
private final Tesseract tesseract;
Comment on lines +24 to +25
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is trivial and can be derived directly from the code. It doesn't add any new information about the implementation or reasoning behind using Tesseract.


/**
* Constructs a new OcrService with default settings.
* Currently uses Tesseract with English language support.
*/
public OcrService() {
if (Platform.isMac()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (Platform.isARM()) {
System.setProperty(JNA_LIBRARY_PATH, JNA_LIBRARY_PATH + File.pathSeparator + "/opt/homebrew/lib/");
} else {
System.setProperty(JNA_LIBRARY_PATH, JNA_LIBRARY_PATH + File.pathSeparator + "/usr/local/cellar/");
}
}
this.tesseract = new Tesseract();

// Configure Tesseract
tesseract.setLanguage("eng");

// TODO: This path needs to be configurable and bundled properly
// For now, we'll use a relative path that works during development
tesseract.setDatapath("tessdata");

LOGGER.debug("Initialized OcrService with Tesseract");
}

/**
* Performs OCR on a PDF file and returns the extracted text.
*
* @param pdfPath Path to the PDF file to process
* @return The extracted text, or empty string if no text found
* @throws OcrException if OCR processing fails
*/
public String performOcr(Path pdfPath) throws OcrException {
// Validate input
if (pdfPath == null) {
throw new OcrException("PDF path cannot be null");
}

File pdfFile = pdfPath.toFile();
if (!pdfFile.exists()) {
throw new OcrException("PDF file does not exist: " + pdfPath);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


try {
LOGGER.info("Starting OCR for file: {}", pdfFile.getName());

// Perform OCR
String result = tesseract.doOCR(pdfFile);

// Clean up the result (remove extra whitespace, etc.)
result = StringUtil.isBlank(result) ? "" : result.trim();

LOGGER.info("OCR completed successfully. Extracted {} characters", result.length());
return result;
} catch (
TesseractException e) {
LOGGER.error("OCR failed for file: {}", pdfFile.getName(), e);
throw new OcrException(
"Failed to perform OCR on file: " + pdfFile.getName() +
". Error: " + e.getMessage(), e
);
}
}
}
8 changes: 8 additions & 0 deletions jablib/src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ Extract\ references\ from\ file\ (online)=Extract references from file (online)
Extract\ References\ (offline)=Extract References (offline)
Extract\ References\ (online)=Extract References (online)

Extract\ text\ (OCR)=Extract text (OCR)
Performing\ OCR...=Performing OCR...
OCR\ Complete=OCR Complete
OCR\ Result=OCR Result
OCR\ failed=OCR failed
No\ text\ was\ found\ in\ the\ PDF.=No text was found in the PDF.
Could\ not\ locate\ the\ PDF\ file\ on\ disk.=Could not locate the PDF file on disk.

Processing...=Processing...
Processing\ "%0"...=Processing "%0"...
Processing\ PDF(s)=Processing PDF(s)
Expand Down
Loading