Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

- update to the latest t-engine version #2

Merged
merged 1 commit into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,27 @@ $ docker run -p 8090:8090 -t alfresco/alfresco-transformer-ocr:latest
A sample web page has been created in order to test the transformer is working:

http://localhost:8090

## Alfresco Action integration
in order to use Alfresco Share Rules and Actions you need to enable the "embed-metadata" action. Once enabled on the repoistory side it would appear in Share under rule's actions
```
<bean id="embed-metadata" class="org.alfresco.repo.action.executer.ContentMetadataEmbedder" parent="action-executer">
<property name="nodeService">
<ref bean="NodeService" />
</property>
<property name="contentService">
<ref bean="ContentService" />
</property>
<property name="dictionaryService">
<ref bean="dictionaryService" />
</property>
<property name="metadataExtracterRegistry">
<ref bean="metadataExtracterRegistry" />
</property>
<property name="applicableTypes">
<list>
<value>{http://www.alfresco.org/model/content/1.0}content</value>
</list>
</property>
</bean>
```
4 changes: 2 additions & 2 deletions ats-transformer-ocr/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.8.RELEASE</version>
<version>2.5.4</version>
</parent>

<properties>
<alfresco.transformer.base.version>2.1.0</alfresco.transformer.base.version>
<alfresco.transformer.base.version>2.5.4-SNAPSHOT</alfresco.transformer.base.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.alfresco.transformer.fs.FileManager;
import org.alfresco.transformer.logging.LogEntry;
import org.alfresco.transformer.probes.ProbeTestTransform;
import org.alfresco.transformer.transformers.OcrTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -74,17 +75,23 @@ public class TransformerController extends AbstractTransformerController {

private static final Logger logger = LoggerFactory.getLogger(TransformerController.class);

private final PdfToOcrdPdfTransformerExecutor javaExecutor;
private final OcrTransformer ocrTransfromer;

@Autowired
private PdfToOcrdPdfTransformerExecutor javaExecutor;
public TransformerController(PdfToOcrdPdfTransformerExecutor javaExecutor, OcrTransformer ocrTransfromer) {
this.javaExecutor = javaExecutor;
this.ocrTransfromer = ocrTransfromer;
}

@Override
@Override
public String getTransformerName() {
return "ocr";
}

@Override
public String version() {
return "1.0";
return "1.1";
}

@Override
Expand All @@ -98,66 +105,16 @@ protected void executeTransformCommand(File sourceFile, File targetFile) {
}
};
}

@PostMapping(value = "/transform", consumes = MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<Resource> transform(HttpServletRequest request,
@RequestParam("file") final MultipartFile sourceMultipartFile,
@RequestParam("sourceMimetype") final String sourceMimetype,
@RequestParam("targetExtension") final String targetExtension,
@RequestParam("targetMimetype") final String targetMimetype,

@RequestParam(value = "timeout", required = false) final Long timeout,
@RequestParam(value = "testDelay", required = false) final Long testDelay) {

// We know the target extension and MIME type
// Let's make a random unique filename for temporary storage to hand back to the ATS Transform Router
final String targetFilename = FileManager.createTargetFileName(sourceMultipartFile.getOriginalFilename(), targetExtension);

// Inform the probe of pending transformation
// This allows ATS to track usage statistics
this.getProbeTestTransform().incrementTransformerCount();

// Using parameters, get Java IO File references all squared away
// This means downloading the source file to the local file system
// It would be better if you stream straight from the source and not make a copy
final File sourceFile = FileManager.createSourceFile(request, sourceMultipartFile);
final File targetFile = FileManager.createTargetFile(request, targetFilename);
// Both files are deleted by TransformInterceptor.afterCompletion

// Store all the extra parameters in a Map
final Map<String, String> transformOptions = this.createTransformOptions();

// Make a decision on what sub-transformer or algorithm to use for the transformation
final String transform = this.getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions);

// Execute the transformation
this.javaExecutor.call(sourceFile, targetFile, transform, targetMimetype);

// Prepare the response
final ResponseEntity<Resource> body = FileManager.createAttachment(targetFilename, targetFile);

// Some logging overhead
LogEntry.setTargetSize(targetFile.length());
long time = LogEntry.setStatusCodeAndMessage(OK.value(), "Success");
time += LogEntry.addDelay(testDelay);

// Inform the probe of the time taken for the transformation
// This allows ATS to track duration statistics
this.getProbeTestTransform().recordTransformTime(time);

return body;
}


@Override
public void processTransform(final File sourceFile, final File targetFile,
final String sourceMimetype, final String targetMimetype,
final Map<String, String> transformOptions, final Long timeout) {
logger.debug("Processing request with: sourceFile '{}', targetFile '{}', transformOptions" +
" '{}', timeout {} ms", sourceFile, targetFile, transformOptions, timeout);

final String transform = this.getTransformerName(sourceFile, sourceMimetype, targetMimetype, transformOptions);

this.javaExecutor.call(sourceFile, targetFile, transform, targetMimetype);
public void transformImpl(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile) {

if("ocrembedded".equals(transformName)) {
ocrTransfromer.embedMetadata(transformName, sourceMimetype, targetMimetype, transformOptions, sourceFile, targetFile);
} else {
// Execute the transformation
this.javaExecutor.call(sourceFile, targetFile, transformName, targetMimetype);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,17 @@

import org.alfresco.transform.exceptions.TransformException;
import org.alfresco.transformer.command.StreamGobbler;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.stereotype.Component;

@Component
public class PdfToOcrdPdfTransformerExecutor implements JavaExecutor
{

@Autowired
public PdfToOcrdPdfTransformerExecutor()
{
}

@Override
public String getTransformerId() {
return "ocr";
}

@Override
public void call(File sourceFile, File targetFile, String... args) throws TransformException
{
Expand Down Expand Up @@ -76,5 +74,4 @@ private static String getMessage(Exception e)
{
return e.getMessage() == null ? e.getClass().getSimpleName() : e.getMessage();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.alfresco.transformer.transformers;

import java.io.File;
import java.util.Map;

import org.alfresco.transformer.executors.PdfToOcrdPdfTransformerExecutor;
import org.alfresco.transformer.executors.Transformer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

@Component
public class OcrTransformer implements Transformer {

public static final String ID = "ocrembedded";

private final PdfToOcrdPdfTransformerExecutor executor;

@Autowired
public OcrTransformer(PdfToOcrdPdfTransformerExecutor executor) {
this.executor = executor;
}

@Override
public String getTransformerId() {
return ID;
}

@Override
public void embedMetadata(String transformName, String sourceMimetype, String targetMimetype,
Map<String, String> transformOptions, File sourceFile, File targetFile) {

executor.call(sourceFile, targetFile, transformName, targetMimetype);
}

}
12 changes: 12 additions & 0 deletions ats-transformer-ocr/src/main/resources/engine_config.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
{
"transformOptions": {
"embedMetadata": [
{ "value": { "name": "metadata" } }
]
},
"transformers": [
{
"transformerName": "ocr",
Expand All @@ -7,6 +12,13 @@
],
"transformOptions": [
]
},
{
"transformerName": "ocrembedded",
"supportedSourceAndTargetList": [
{ "sourceMediaType": "application/pdf", "targetMediaType": "alfresco-metadata-embed" }
],
"transformOptions": ["embedMetadata"]
}
]
}