From df3954c87913c4d9ec029bfd57bddf2db53f2ad4 Mon Sep 17 00:00:00 2001 From: Benjamin Knoll Date: Thu, 3 Oct 2024 12:14:44 -0500 Subject: [PATCH] Adding REST rtf only pipeline, cleaning up Java warnings, adjusting to MTAP changes --- .gitignore | 3 ++ biomedicus_client/pyproject.toml | 5 +- java/build.gradle | 2 +- .../acronym/AcronymDetectorProcessor.java | 13 ++++- .../biomedicus/acronym/AlignmentModel.java | 1 - .../OrthographicAcronymModelTrainer.java | 3 -- .../acronym/RocksDBSenseVectors.java | 4 +- .../biomedicus/acronym/WordVectorSpace.java | 1 + .../common/tokenization/Tokenizer.java | 6 ++- .../umn/biomedicus/common/tuples/WordCap.java | 6 --- .../common/utilities/RocksToSLF4JLogger.java | 11 +---- .../concepts/ConceptDictionaryBuilder.java | 8 +++- .../biomedicus/concepts/ConceptsUtility.java | 47 +++++++++---------- .../concepts/DictionaryConceptDetector.java | 8 ++-- .../concepts/RocksDbConceptDictionary.java | 6 ++- .../modification/ModificationDetector.java | 6 +-- .../normalization/NormalizationProcessor.java | 2 +- .../normalization/NormalizerModelBuilder.java | 8 +--- .../normalization/RocksDBNormalizerModel.java | 3 +- .../main/java/edu/umn/biomedicus/rtf/RTF.java | 3 +- .../edu/umn/biomedicus/rtf/RtfProcessor.java | 5 +- .../biomedicus/rtf/reader/KeywordAction.java | 1 - .../rtf/reader/RtfParserFactory.java | 6 +-- .../tagging/tnt/RocksDbDataStoreFactory.java | 6 ++- .../tagging/tnt/TntPosTaggerProcessor.java | 2 +- .../tagging/tnt/TntTrainerProcessor.java | 2 +- .../utilities/PtbReaderProcessor.java | 2 +- python/biomedicus/cli.py | 5 +- .../deployment/confs/biomedicus_deploy.yml | 1 - .../deployment/confs/rtf_to_text_deploy.yml | 1 - .../deployment/confs/scaleout_deploy.yml | 1 - python/biomedicus/pipeline_service.py | 30 +++++++++++- python/tests/conftest.py | 2 + python/tests/deployment/test_rest_e2e.py | 45 ++++++++++++++++-- tools/docker/gateway.yml | 2 + tools/docker/rest_e2e.sh | 1 + 36 files changed, 161 insertions(+), 97 deletions(-) diff --git a/.gitignore b/.gitignore index 2de2cbf5..eea5fa0b 100644 --- a/.gitignore +++ b/.gitignore @@ -385,3 +385,6 @@ gradle-app.setting Pipfile Pipfile.lock test_results.yml + +# Visual Studio Code +.vscode diff --git a/biomedicus_client/pyproject.toml b/biomedicus_client/pyproject.toml index bad3adb6..90717ad6 100644 --- a/biomedicus_client/pyproject.toml +++ b/biomedicus_client/pyproject.toml @@ -36,10 +36,7 @@ classifiers = [ 'Topic :: Text Processing :: Linguistic' ] dependencies = [ - "mtap>=1.4.2", - "grpcio==1.66.1", - "grpcio-health-checking==1.66.1", - "grpcio-status==1.66.1", + "mtap>1.4.2", "tqdm==4.66.5", "importlib_resources==6.4.0", "pyyaml==6.0.2" diff --git a/java/build.gradle b/java/build.gradle index 11a740d0..3f8fa880 100644 --- a/java/build.gradle +++ b/java/build.gradle @@ -69,7 +69,7 @@ repositories { dependencies { implementation group: 'org.jetbrains', name: 'annotations', version: '24.1.0' - implementation group: 'edu.umn.nlpie', name: 'mtap', version: '1.4.0' + implementation group: 'edu.umn.nlpie', name: 'mtap', version: '1.4.3-SNAPSHOT' implementation group: 'org.slf4j', name: 'slf4j-api', version: '2.0.16' implementation group: 'args4j', name: 'args4j', version: '2.37' diff --git a/java/src/main/java/edu/umn/biomedicus/acronym/AcronymDetectorProcessor.java b/java/src/main/java/edu/umn/biomedicus/acronym/AcronymDetectorProcessor.java index 9c44f98b..79151911 100644 --- a/java/src/main/java/edu/umn/biomedicus/acronym/AcronymDetectorProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/acronym/AcronymDetectorProcessor.java @@ -23,7 +23,6 @@ import edu.umn.biomedicus.common.tokenization.WhitespaceTokenizer; import edu.umn.biomedicus.common.tuples.Pair; import edu.umn.biomedicus.serialization.YamlSerialization; -import edu.umn.nlpie.mtap.MTAP; import edu.umn.nlpie.mtap.common.JsonObject; import edu.umn.nlpie.mtap.common.JsonObjectBuilder; import edu.umn.nlpie.mtap.model.*; @@ -277,7 +276,7 @@ void writeToDirectory(Path outputDir, protected void process( @NotNull Document document, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result + @NotNull JsonObjectBuilder result ) { LOGGER.debug("Detecting acronyms in a document."); Boolean labelOtherSenses = params.getBooleanValue("label_other_senses"); @@ -320,6 +319,15 @@ protected void process( } } + @Override + public void shutdown() { + try { + senseVectors.close(); + } catch (IOException e) { + LOGGER.error("Error closing sense vectors dictionary", e); + } + } + private boolean allExcluded(List posTags) { return posTags.stream() .map(tagLabel -> PartsOfSpeech.forTag(tagLabel.getStringValue("tag"))) @@ -480,6 +488,7 @@ public AcronymDetectorProcessor build() throws IOException { LOGGER.info("Loading acronym vector space: {}", vectorSpace); WordVectorSpace wordVectorSpace = WordVectorSpace.load(vectorSpace); LOGGER.info("Loading acronym sense map: {}. inMemory = {}", senseMap, sensesInMemory); + @SuppressWarnings("resource") // This is closed when the processors is shutdown. SenseVectors senseVectors = new RocksDBSenseVectors(senseMap, false) .inMemory(sensesInMemory); AlignmentModel alignment = null; diff --git a/java/src/main/java/edu/umn/biomedicus/acronym/AlignmentModel.java b/java/src/main/java/edu/umn/biomedicus/acronym/AlignmentModel.java index 68ac8bc9..4dd65520 100644 --- a/java/src/main/java/edu/umn/biomedicus/acronym/AlignmentModel.java +++ b/java/src/main/java/edu/umn/biomedicus/acronym/AlignmentModel.java @@ -42,7 +42,6 @@ class AlignmentModel { private static final Logger LOGGER = LoggerFactory.getLogger(AlignmentModel.class); private List longforms; private boolean caseSensitive; - private Map cachedScores = new HashMap<>(); private AlignmentModel(List longforms, boolean caseSensitive) { this.longforms = longforms; diff --git a/java/src/main/java/edu/umn/biomedicus/acronym/OrthographicAcronymModelTrainer.java b/java/src/main/java/edu/umn/biomedicus/acronym/OrthographicAcronymModelTrainer.java index 8a03a406..3ff29135 100644 --- a/java/src/main/java/edu/umn/biomedicus/acronym/OrthographicAcronymModelTrainer.java +++ b/java/src/main/java/edu/umn/biomedicus/acronym/OrthographicAcronymModelTrainer.java @@ -35,7 +35,6 @@ public class OrthographicAcronymModelTrainer { private static final double discounting = .9; private final boolean caseSensitive; private final int[] symbols; - private final int[] chars; private final double[][][] longformProbs; private final double[][][] abbrevProbs; /** @@ -50,8 +49,6 @@ public OrthographicAcronymModelTrainer(boolean caseSensitive) { this.caseSensitive = caseSensitive; symbols = caseSensitive ? OrthographicAcronymModel.CASE_SENS_SYMBOLS : OrthographicAcronymModel.CASE_INSENS_SYMBOLS; - chars = caseSensitive ? OrthographicAcronymModel.CASE_SENS_CHARS - : OrthographicAcronymModel.CASE_INSENS_CHARS; longformProbs = new double[symbols.length][symbols.length][symbols.length]; abbrevProbs = new double[symbols.length][symbols.length][symbols.length]; longformsLower = new HashSet<>(); diff --git a/java/src/main/java/edu/umn/biomedicus/acronym/RocksDBSenseVectors.java b/java/src/main/java/edu/umn/biomedicus/acronym/RocksDBSenseVectors.java index fb228a3f..ccb955b5 100644 --- a/java/src/main/java/edu/umn/biomedicus/acronym/RocksDBSenseVectors.java +++ b/java/src/main/java/edu/umn/biomedicus/acronym/RocksDBSenseVectors.java @@ -33,7 +33,9 @@ public RocksDBSenseVectors(Path path, boolean forWriting) { RocksDB.loadLibrary(); if (forWriting) { - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) { + try (Options options = new Options()) { + options.setCreateIfMissing(true); + options.prepareForBulkLoad(); rocksDB = RocksDB.open(options, path.toString()); } catch (RocksDBException e) { throw new RuntimeException(e); diff --git a/java/src/main/java/edu/umn/biomedicus/acronym/WordVectorSpace.java b/java/src/main/java/edu/umn/biomedicus/acronym/WordVectorSpace.java index 88788fce..0e35a0b1 100644 --- a/java/src/main/java/edu/umn/biomedicus/acronym/WordVectorSpace.java +++ b/java/src/main/java/edu/umn/biomedicus/acronym/WordVectorSpace.java @@ -51,6 +51,7 @@ public class WordVectorSpace { * Default weighting function is sigmoid that decreases with distance (to 0.5 at maxDist) * Need to cast to Serializable to save it */ + @SuppressWarnings("unchecked") private static final BiFunction DIST_WEIGHT = (BiFunction & Serializable) (dist, maxDist) -> 1.0 / (1.0 + Math.exp(SLOPE * (Math.abs(dist) - maxDist))); diff --git a/java/src/main/java/edu/umn/biomedicus/common/tokenization/Tokenizer.java b/java/src/main/java/edu/umn/biomedicus/common/tokenization/Tokenizer.java index 93bc5760..13a07c37 100644 --- a/java/src/main/java/edu/umn/biomedicus/common/tokenization/Tokenizer.java +++ b/java/src/main/java/edu/umn/biomedicus/common/tokenization/Tokenizer.java @@ -242,7 +242,11 @@ private static List loadUnitsList() { } } InputStream is = Tokenizer.class.getResourceAsStream("unitsList.txt"); - return new BufferedReader(new InputStreamReader(is)).lines().collect(Collectors.toList()); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(is))) { + return reader.lines().collect(Collectors.toList()); + } catch (IOException e) { + throw new IllegalStateException("Failed to load units list.", e); + } } /** diff --git a/java/src/main/java/edu/umn/biomedicus/common/tuples/WordCap.java b/java/src/main/java/edu/umn/biomedicus/common/tuples/WordCap.java index 4fdcf4a5..bf3095c2 100644 --- a/java/src/main/java/edu/umn/biomedicus/common/tuples/WordCap.java +++ b/java/src/main/java/edu/umn/biomedicus/common/tuples/WordCap.java @@ -18,7 +18,6 @@ import org.jetbrains.annotations.Nullable; import java.io.Serializable; -import java.util.regex.Pattern; /** * Represents a pair of word and isCapitalized. @@ -33,11 +32,6 @@ public class WordCap implements Comparable, Serializable { */ private static final long serialVersionUID = -5981094615088473604L; - /** - * Pattern which matches digits. - */ - private static final Pattern DIGITS = Pattern.compile("\\d"); - /** * The word in the word-capitalization. */ diff --git a/java/src/main/java/edu/umn/biomedicus/common/utilities/RocksToSLF4JLogger.java b/java/src/main/java/edu/umn/biomedicus/common/utilities/RocksToSLF4JLogger.java index feda0797..b6d62378 100644 --- a/java/src/main/java/edu/umn/biomedicus/common/utilities/RocksToSLF4JLogger.java +++ b/java/src/main/java/edu/umn/biomedicus/common/utilities/RocksToSLF4JLogger.java @@ -16,22 +16,15 @@ package edu.umn.biomedicus.common.utilities; -import org.rocksdb.DBOptions; import org.rocksdb.InfoLogLevel; import org.rocksdb.Logger; -import org.rocksdb.Options; public class RocksToSLF4JLogger extends Logger { private final org.slf4j.Logger slf4jLogger; - public RocksToSLF4JLogger(Options options, org.slf4j.Logger slf4jLogger) { - super(options); - this.slf4jLogger = slf4jLogger; - } - - public RocksToSLF4JLogger(DBOptions dboptions, org.slf4j.Logger slf4jLogger) { - super(dboptions); + public RocksToSLF4JLogger(InfoLogLevel logLevel, org.slf4j.Logger slf4jLogger) { + super(logLevel); this.slf4jLogger = slf4jLogger; } diff --git a/java/src/main/java/edu/umn/biomedicus/concepts/ConceptDictionaryBuilder.java b/java/src/main/java/edu/umn/biomedicus/concepts/ConceptDictionaryBuilder.java index 7075360c..5cfaa079 100644 --- a/java/src/main/java/edu/umn/biomedicus/concepts/ConceptDictionaryBuilder.java +++ b/java/src/main/java/edu/umn/biomedicus/concepts/ConceptDictionaryBuilder.java @@ -223,7 +223,9 @@ private void doWork() throws IOException { } } } - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) { + try (Options options = new Options()) { + options.setCreateIfMissing(true); + options.prepareForBulkLoad(); try (RocksDB phrases = RocksDB.open(options, dbPath.resolve("phrases").toString()); RocksDB lowercase = RocksDB.open(options, dbPath.resolve("lowercase").toString())) { int wrote = 0; @@ -306,8 +308,10 @@ private void doWork() throws IOException { } int wrote = 0; - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad(); + try (Options options = new Options(); RocksDB normsDb = RocksDB.open(options, dbPath.resolve("norms").toString())) { + options.setCreateIfMissing(true); + options.prepareForBulkLoad(); for (Entry> entry : map.entrySet()) { List suiCuiTuis = entry.getValue(); byte[] suiCuiTuiBytes = getBytes(suiCuiTuis); diff --git a/java/src/main/java/edu/umn/biomedicus/concepts/ConceptsUtility.java b/java/src/main/java/edu/umn/biomedicus/concepts/ConceptsUtility.java index 38fc9d9a..11c616f9 100644 --- a/java/src/main/java/edu/umn/biomedicus/concepts/ConceptsUtility.java +++ b/java/src/main/java/edu/umn/biomedicus/concepts/ConceptsUtility.java @@ -16,7 +16,6 @@ package edu.umn.biomedicus.concepts; -import edu.umn.biomedicus.common.config.Config; import org.jetbrains.annotations.Nullable; import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; @@ -33,41 +32,37 @@ * Utility */ public class ConceptsUtility { - @Option( - name = "--db-path", - metaVar = "PATH_TO", - usage = "Optional override path to the concepts dictionary." - ) + @Option(name = "--db-path", metaVar = "PATH_TO", usage = "Optional override path to the concepts dictionary.") private @Nullable Path dbPath = null; public void listenToConsole() throws IOException, RocksDBException { - Scanner scanner = new Scanner(System.in); - System.out.println("Reading concepts from database"); + try (Scanner scanner = new Scanner(System.in)) { + System.out.println("Reading concepts from database"); - DictionaryConceptDetector.ConceptsOptions conceptsOptions = new DictionaryConceptDetector.ConceptsOptions(); - conceptsOptions.setDbPath(dbPath); - conceptsOptions.setInMemory(true); - ConceptDictionary dictionary = DictionaryConceptDetector.loadConceptsDictionary(conceptsOptions); + DictionaryConceptDetector.ConceptsOptions conceptsOptions = new DictionaryConceptDetector.ConceptsOptions(); + conceptsOptions.setDbPath(dbPath); + conceptsOptions.setInMemory(true); + ConceptDictionary dictionary = DictionaryConceptDetector.loadConceptsDictionary(conceptsOptions); - while (true) { - System.out.print("Q: "); - String query = scanner.nextLine(); - if ("!q".equals(query)) { - return; - } else if (CUI_PATTERN.matcher(query).matches()) { - for (PhraseConcept phraseConcept : dictionary.withCui(new CUI(query))) { - System.out.println(phraseConcept.toString()); - } - } else { - System.out.println("Searching for " + query); - for (PhraseConcept phraseConcept : dictionary.withWord(query)) { - System.out.println(phraseConcept.toString()); + while (true) { + System.out.print("Q: "); + String query = scanner.nextLine(); + if ("!q".equals(query)) { + return; + } else if (CUI_PATTERN.matcher(query).matches()) { + for (PhraseConcept phraseConcept : dictionary.withCui(new CUI(query))) { + System.out.println(phraseConcept.toString()); + } + } else { + System.out.println("Searching for " + query); + for (PhraseConcept phraseConcept : dictionary.withWord(query)) { + System.out.println(phraseConcept.toString()); + } } } } } - public static void main(String[] args) { ConceptsUtility conceptsUtility = new ConceptsUtility(); CmdLineParser parser = new CmdLineParser(conceptsUtility); diff --git a/java/src/main/java/edu/umn/biomedicus/concepts/DictionaryConceptDetector.java b/java/src/main/java/edu/umn/biomedicus/concepts/DictionaryConceptDetector.java index 9c16a127..4bf80ccf 100644 --- a/java/src/main/java/edu/umn/biomedicus/concepts/DictionaryConceptDetector.java +++ b/java/src/main/java/edu/umn/biomedicus/concepts/DictionaryConceptDetector.java @@ -192,7 +192,7 @@ public static void main(String[] args) { protected void process( @NotNull Document document, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result + @NotNull JsonObjectBuilder result ) { LOGGER.debug("Finding concepts in document."); @@ -362,8 +362,10 @@ public void run() { sentenceNorms.add(norm); } } else { - for (GenericLabel genericLabel : norms.inside(sentence)) { - sentenceNorms.add(genericLabel.getStringValue("norm")); + if (norms != null) { + for (GenericLabel genericLabel : norms.inside(sentence)) { + sentenceNorms.add(genericLabel.getStringValue("norm")); + } } } diff --git a/java/src/main/java/edu/umn/biomedicus/concepts/RocksDbConceptDictionary.java b/java/src/main/java/edu/umn/biomedicus/concepts/RocksDbConceptDictionary.java index 22731d0f..6069663e 100644 --- a/java/src/main/java/edu/umn/biomedicus/concepts/RocksDbConceptDictionary.java +++ b/java/src/main/java/edu/umn/biomedicus/concepts/RocksDbConceptDictionary.java @@ -21,6 +21,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import edu.umn.biomedicus.common.utilities.RocksToSLF4JLogger; + import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; @@ -72,7 +74,9 @@ static List toList(byte[] bytes) { public static ConceptDictionary loadModel(Path dbPath, boolean inMemory) throws RocksDBException, IOException { RocksDB.loadLibrary(); - try (Options options = new Options().setInfoLogLevel(InfoLogLevel.ERROR_LEVEL)) { + try (Options options = new Options()) { + options.setInfoLogLevel(InfoLogLevel.ERROR_LEVEL); + options.setLogger(new RocksToSLF4JLogger(InfoLogLevel.ERROR_LEVEL, LOGGER)); LOGGER.info("Opening concepts dictionary: {}. inMemory = {}.", dbPath, inMemory); RocksDB phrasesDB = RocksDB.openReadOnly(options, dbPath.resolve("phrases").toString()); diff --git a/java/src/main/java/edu/umn/biomedicus/modification/ModificationDetector.java b/java/src/main/java/edu/umn/biomedicus/modification/ModificationDetector.java index 3df38eb3..98db7df9 100644 --- a/java/src/main/java/edu/umn/biomedicus/modification/ModificationDetector.java +++ b/java/src/main/java/edu/umn/biomedicus/modification/ModificationDetector.java @@ -221,7 +221,7 @@ public static void main(String[] args) { } @Override - protected void process(@NotNull Document document, @NotNull JsonObject params, @NotNull JsonObjectBuilder result) { + protected void process(@NotNull Document document, @NotNull JsonObject params, @NotNull JsonObjectBuilder result) { String termIndexName = params.getStringValue("terms_index"); if (termIndexName == null) { termIndexName = "umls_terms"; @@ -249,7 +249,7 @@ protected void process(@NotNull Document document, @NotNull JsonObject params, @ Pair> searchResult = CUES.searchLeft(contextList); if (searchResult != null) { - List cues = searchResult.second().stream().map(span -> { + searchResult.second().stream().map(span -> { GenericLabel cue = GenericLabel.withSpan(span).build(); cueLabeler.add(cue); return cue; @@ -274,7 +274,7 @@ protected void process(@NotNull Document document, @NotNull JsonObject params, @ searchResult = CUES.searchRight(contextList); if (searchResult != null) { - List cues = searchResult.second().stream().map(span -> { + searchResult.second().stream().map(span -> { GenericLabel cue = GenericLabel.withSpan(span).build(); cueLabeler.add(cue); return cue; diff --git a/java/src/main/java/edu/umn/biomedicus/normalization/NormalizationProcessor.java b/java/src/main/java/edu/umn/biomedicus/normalization/NormalizationProcessor.java index a6480942..3fd30a7c 100644 --- a/java/src/main/java/edu/umn/biomedicus/normalization/NormalizationProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/normalization/NormalizationProcessor.java @@ -112,7 +112,7 @@ public static void main(String[] args) { protected void process( @NotNull Document document, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result + @NotNull JsonObjectBuilder result ) { LOGGER.debug("Normalizing tokens in a document."); LabelIndex posTagIndex = document.getLabelIndex("pos_tags"); diff --git a/java/src/main/java/edu/umn/biomedicus/normalization/NormalizerModelBuilder.java b/java/src/main/java/edu/umn/biomedicus/normalization/NormalizerModelBuilder.java index ebf76d25..e989bcd7 100644 --- a/java/src/main/java/edu/umn/biomedicus/normalization/NormalizerModelBuilder.java +++ b/java/src/main/java/edu/umn/biomedicus/normalization/NormalizerModelBuilder.java @@ -20,13 +20,10 @@ import org.kohsuke.args4j.Argument; import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; -import org.kohsuke.args4j.Option; import org.kohsuke.args4j.spi.PathOptionHandler; import org.rocksdb.Options; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; @@ -64,8 +61,6 @@ public final class NormalizerModelBuilder { */ public static final int LRAGR_BASE_FORM = 4; - private static final Logger LOGGER = LoggerFactory.getLogger(NormalizerModelBuilder.class); - private static final int IGNORE_WHEN_LONGER = 100; private static final Map LRAGR_TO_PENN; @@ -191,7 +186,8 @@ public void doWork() throws IOException { System.out.println("Creating normalizer db from " + builder.size() + " terms"); - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) { + try (Options options = new Options()) { + options.setCreateIfMissing(true).prepareForBulkLoad(); try (RocksDB rocksDB = RocksDB.open(options, dbPath.toString())) { builder.forEach((tp, ts) -> { try { diff --git a/java/src/main/java/edu/umn/biomedicus/normalization/RocksDBNormalizerModel.java b/java/src/main/java/edu/umn/biomedicus/normalization/RocksDBNormalizerModel.java index 2aa0fdee..1c48bc36 100644 --- a/java/src/main/java/edu/umn/biomedicus/normalization/RocksDBNormalizerModel.java +++ b/java/src/main/java/edu/umn/biomedicus/normalization/RocksDBNormalizerModel.java @@ -38,7 +38,8 @@ public static RocksDBNormalizerModel loadModel(Path dbPath) { RocksDBNormalizerModel(Path dbPath) { RocksDB.loadLibrary(); - try (Options options = new Options().setInfoLogLevel(InfoLogLevel.ERROR_LEVEL)) { + try (Options options = new Options()) { + options.setInfoLogLevel(InfoLogLevel.ERROR_LEVEL); db = RocksDB.openReadOnly(options, dbPath.toString()); } catch (RocksDBException e) { throw new RuntimeException(e); diff --git a/java/src/main/java/edu/umn/biomedicus/rtf/RTF.java b/java/src/main/java/edu/umn/biomedicus/rtf/RTF.java index 5f9367c8..ba5f7a09 100644 --- a/java/src/main/java/edu/umn/biomedicus/rtf/RTF.java +++ b/java/src/main/java/edu/umn/biomedicus/rtf/RTF.java @@ -46,7 +46,6 @@ public static RtfParserFactory getFactory() throws IOException { .getKeywordActionsAsMap(); PropertiesDescription propertiesDescription = PropertiesDescription .loadFromFile("edu/umn/biomedicus/rtf/PropertiesDescription.xml"); - return new RtfParserFactory(controlKeywordsDescription, keywordActionMap, - propertiesDescription); + return new RtfParserFactory(keywordActionMap, propertiesDescription); } } diff --git a/java/src/main/java/edu/umn/biomedicus/rtf/RtfProcessor.java b/java/src/main/java/edu/umn/biomedicus/rtf/RtfProcessor.java index aefca7e2..50b5494f 100644 --- a/java/src/main/java/edu/umn/biomedicus/rtf/RtfProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/rtf/RtfProcessor.java @@ -28,8 +28,6 @@ import org.jetbrains.annotations.NotNull; import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; @@ -55,7 +53,6 @@ description = "Rtf underlined formatting."), }) public class RtfProcessor extends EventProcessor { - private static final Logger LOGGER = LoggerFactory.getLogger(RtfProcessor.class); private final RtfParserFactory factory; @@ -80,7 +77,7 @@ public static void main(String[] args) { } @Override - public void process(@NotNull Event event, @NotNull JsonObject params, @NotNull JsonObjectBuilder result) { + public void process(@NotNull Event event, @NotNull JsonObject params, @NotNull JsonObjectBuilder result) { String binaryDataName = params.getStringValue("binary_data_name"); if (binaryDataName == null) { binaryDataName = "rtf"; diff --git a/java/src/main/java/edu/umn/biomedicus/rtf/reader/KeywordAction.java b/java/src/main/java/edu/umn/biomedicus/rtf/reader/KeywordAction.java index e4318e70..227bce12 100644 --- a/java/src/main/java/edu/umn/biomedicus/rtf/reader/KeywordAction.java +++ b/java/src/main/java/edu/umn/biomedicus/rtf/reader/KeywordAction.java @@ -16,7 +16,6 @@ package edu.umn.biomedicus.rtf.reader; -import edu.umn.biomedicus.rtf.exc.RtfReaderException; import org.jetbrains.annotations.Nullable; import java.io.IOException; diff --git a/java/src/main/java/edu/umn/biomedicus/rtf/reader/RtfParserFactory.java b/java/src/main/java/edu/umn/biomedicus/rtf/reader/RtfParserFactory.java index 88d1689e..925d1999 100644 --- a/java/src/main/java/edu/umn/biomedicus/rtf/reader/RtfParserFactory.java +++ b/java/src/main/java/edu/umn/biomedicus/rtf/reader/RtfParserFactory.java @@ -16,20 +16,16 @@ package edu.umn.biomedicus.rtf.reader; -import edu.umn.biomedicus.rtf.beans.keywords.ControlKeywordsDescription; import edu.umn.biomedicus.rtf.beans.properties.PropertiesDescription; import java.util.Map; public class RtfParserFactory { - private final ControlKeywordsDescription controlKeywordsDescription; private final Map keywordActionMap; private final PropertiesDescription propertiesDescription; - public RtfParserFactory(ControlKeywordsDescription controlKeywordsDescription, - Map keywordActionMap, + public RtfParserFactory(Map keywordActionMap, PropertiesDescription propertiesDescription) { - this.controlKeywordsDescription = controlKeywordsDescription; this.keywordActionMap = keywordActionMap; this.propertiesDescription = propertiesDescription; } diff --git a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/RocksDbDataStoreFactory.java b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/RocksDbDataStoreFactory.java index 875f89db..3641930f 100644 --- a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/RocksDbDataStoreFactory.java +++ b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/RocksDbDataStoreFactory.java @@ -85,7 +85,8 @@ private String getSuffixesName(int id) { @Override public SuffixDataStore createSuffixDataStore(int id) { RocksDB.loadLibrary(); - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) { + try (Options options = new Options()) { + options.setCreateIfMissing(true).prepareForBulkLoad(); Files.createDirectories(dbPath); RocksDB rocksDB = RocksDB.open(options, dbPath.resolve(getSuffixesName(id)).toString()); rocksDBS.add(rocksDB); @@ -134,7 +135,8 @@ public KnownWordsDataStore openKnownWordDataStore(int id) { @Override public KnownWordsDataStore createKnownWordsDataStore(int id) { RocksDB.loadLibrary(); - try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) { + try (Options options = new Options()) { + options.setCreateIfMissing(true).prepareForBulkLoad(); Files.createDirectories(dbPath); RocksDB rocksDB = RocksDB.open(options, dbPath.resolve(getWordsName(id)).toString()); rocksDBS.add(rocksDB); diff --git a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntPosTaggerProcessor.java b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntPosTaggerProcessor.java index 954eab97..d3c43379 100644 --- a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntPosTaggerProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntPosTaggerProcessor.java @@ -163,7 +163,7 @@ public static void main(String[] args) { @Override protected void process(@NotNull Document document, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result) { + @NotNull JsonObjectBuilder result) { String sentencesIndex = (String) params.getOrDefault("sentences_index", "sentences"); LabelIndex sentenceLabelIndex = document.getLabelIndex(sentencesIndex); String targetIndex = (String) params.getOrDefault("target_index", "pos_tags"); diff --git a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntTrainerProcessor.java b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntTrainerProcessor.java index fed71e2e..69b7b2c0 100644 --- a/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntTrainerProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/tagging/tnt/TntTrainerProcessor.java @@ -61,7 +61,7 @@ public TntTrainerProcessor(@NotNull Path outputDir, @NotNull DataStoreFactory da @Override protected void process(@NotNull Document document, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result) { + @NotNull JsonObjectBuilder result) { LabelIndex sentences = document.getLabelIndex("sentences"); LabelIndex partsOfSpeech = document.getLabelIndex("pos_tags"); for (GenericLabel sentence : sentences) { diff --git a/java/src/main/java/edu/umn/biomedicus/utilities/PtbReaderProcessor.java b/java/src/main/java/edu/umn/biomedicus/utilities/PtbReaderProcessor.java index c7d1e663..e68d8538 100644 --- a/java/src/main/java/edu/umn/biomedicus/utilities/PtbReaderProcessor.java +++ b/java/src/main/java/edu/umn/biomedicus/utilities/PtbReaderProcessor.java @@ -39,7 +39,7 @@ public class PtbReaderProcessor extends EventProcessor { @Override public void process(@NotNull Event event, @NotNull JsonObject params, - @NotNull JsonObjectBuilder result) { + @NotNull JsonObjectBuilder result) { String sourceDocumentName = params.getStringValue("source_document_name"); if (sourceDocumentName == null) { sourceDocumentName = "source"; diff --git a/python/biomedicus/cli.py b/python/biomedicus/cli.py index 79725497..3798262e 100644 --- a/python/biomedicus/cli.py +++ b/python/biomedicus/cli.py @@ -22,7 +22,7 @@ rtf_to_text ) from biomedicus.java_support import RunJavaCommand -from biomedicus.pipeline_service import ServePipeline +from biomedicus.pipeline_service import ServePipeline, ServeRtfToText from biomedicus.utilities.print_all_processors_metadata import PrintProcessorMetaCommand from biomedicus_client import cli_tools from biomedicus_client.cli_tools import WriteConfigsCommand @@ -45,7 +45,8 @@ def main(args=None): DownloadDataCommand(), PrintProcessorMetaCommand(), rtf_to_text.DeployRtfToTextCommand(), - ServePipeline() + ServePipeline(), + ServeRtfToText() ) conf = parser.parse_args(args) diff --git a/python/biomedicus/deployment/confs/biomedicus_deploy.yml b/python/biomedicus/deployment/confs/biomedicus_deploy.yml index 8c13efdf..022e24f0 100644 --- a/python/biomedicus/deployment/confs/biomedicus_deploy.yml +++ b/python/biomedicus/deployment/confs/biomedicus_deploy.yml @@ -2,7 +2,6 @@ global: host: null mtap_config: null log_level: INFO - register: no events_service: enabled: yes address: 127.0.0.1:50100 diff --git a/python/biomedicus/deployment/confs/rtf_to_text_deploy.yml b/python/biomedicus/deployment/confs/rtf_to_text_deploy.yml index 9a827e01..ede3798e 100644 --- a/python/biomedicus/deployment/confs/rtf_to_text_deploy.yml +++ b/python/biomedicus/deployment/confs/rtf_to_text_deploy.yml @@ -2,7 +2,6 @@ global: host: null mtap_config: null log_level: INFO - register: no events_service: enabled: yes address: localhost:50100 diff --git a/python/biomedicus/deployment/confs/scaleout_deploy.yml b/python/biomedicus/deployment/confs/scaleout_deploy.yml index 6af2c34c..ff39a144 100644 --- a/python/biomedicus/deployment/confs/scaleout_deploy.yml +++ b/python/biomedicus/deployment/confs/scaleout_deploy.yml @@ -2,7 +2,6 @@ global: host: null mtap_config: null log_level: INFO - register: no events_service: enabled: yes addresses: diff --git a/python/biomedicus/pipeline_service.py b/python/biomedicus/pipeline_service.py index fa89b551..cea108ab 100644 --- a/python/biomedicus/pipeline_service.py +++ b/python/biomedicus/pipeline_service.py @@ -17,7 +17,7 @@ from mtap.pipeline import pipeline_parser, run_pipeline_server -from biomedicus_client import default_pipeline +from biomedicus_client import default_pipeline, rtf_to_text from biomedicus_client.cli_tools import Command @@ -61,3 +61,31 @@ def command_fn(self, conf): conf.serializer = None pipeline = default_pipeline.from_args(conf) run_pipeline_server(pipeline, conf) + + +class ServeRtfToText(Command): + @property + def command(self) -> str: + return "serve-rtf-to-text" + + @property + def help(self) -> str: + return "Starts the RTF to text BioMedICUS pipeline service." + + @property + def parents(self) -> List[ArgumentParser]: + return [pipeline_parser()] + + def add_arguments(self, parser: ArgumentParser): + parser.add_argument( + '--config', + default=None, + help='Path to the pipeline configuration file.' + ) + + def command_fn(self, conf): + if conf.port == 0: + conf.port = 55001 + conf.serializer = None + pipeline = rtf_to_text.from_args(conf) + run_pipeline_server(pipeline, conf) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 78ef4484..f193ac97 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -87,6 +87,8 @@ def fixture_events_service(): def _listen(process: subprocess.Popen): + if process.stdout is None: + return for line in process.stdout: print(line.decode(), end='') return process.wait() diff --git a/python/tests/deployment/test_rest_e2e.py b/python/tests/deployment/test_rest_e2e.py index 2be9b3dd..080a5d03 100644 --- a/python/tests/deployment/test_rest_e2e.py +++ b/python/tests/deployment/test_rest_e2e.py @@ -31,15 +31,25 @@ from yaml import Dumper +text = (Path(__file__).parent / 'rtf_in' / '97_204.rtf').read_bytes() + + @pytest.fixture(name='hosted_pipeline') def fixture_hosted_pipeline(deploy_all, processor_watcher): port = find_free_port() p = Popen([sys.executable, '-m', 'biomedicus', 'serve-pipeline', '--port', str(port), '--rtf']) yield from processor_watcher(f'127.0.0.1:{port}', p) + +@pytest.fixture(name='rtf_to_text_pipeline') +def fixture_rtf_to_text_pipeline(deploy_all, processor_watcher): + port = find_free_port() + p = Popen([sys.executable, '-m', 'biomedicus', 'serve-rtf-to-text', '--port', str(port)]) + yield from processor_watcher(f'127.0.0.1:{port}', p) + @pytest.fixture(name='mtap_gateway') -def fixture_mtap_gateway(hosted_pipeline): +def fixture_mtap_gateway(hosted_pipeline, rtf_to_text_pipeline): port = find_free_port() config = { 'discovery': 'consul', @@ -64,6 +74,10 @@ def fixture_mtap_gateway(hosted_pipeline): { 'Identifier': 'biomedicus-default-pipeline', 'Endpoint': hosted_pipeline + }, + { + 'Identifier': 'biomedicus-rtf-to-text', + 'Endpoint': rtf_to_text_pipeline } ] } @@ -115,8 +129,6 @@ def test_rest_e2e(mtap_gateway): session.trust_env = False base_url = "http://" + mtap_gateway - text = (Path(__file__).parent / 'rtf_in' / '97_204.rtf').read_bytes() - body = { 'event': { 'event_id': '1.txt', @@ -137,3 +149,30 @@ def test_rest_e2e(mtap_gateway): resp_body = resp.json() label_indices = resp_body['event']['documents']['plaintext']['label_indices'] assert len(label_indices) > 0 + + +@pytest.mark.integration +def test_rest_rtf(mtap_gateway): + session = requests.Session() + session.trust_env = False + base_url = "http://" + mtap_gateway + + body = { + 'event': { + 'event_id': '1.txt', + 'binaries': { + 'rtf': base64.standard_b64encode(text).decode('utf-8') + } + }, + 'params': { + 'document_name': 'plaintext', + } + } + resp = session.post( + base_url + '/v1/pipeline/biomedicus-rtf-to-text/process', + json=body, + timeout=10 + ) + assert resp.status_code == 200 + resp_body = resp.json() + assert len(resp_body['event']['documents']['plaintext']['text']) > 0 diff --git a/tools/docker/gateway.yml b/tools/docker/gateway.yml index 44fcde38..296d8f54 100644 --- a/tools/docker/gateway.yml +++ b/tools/docker/gateway.yml @@ -5,3 +5,5 @@ gateway: pipelines: - Identifier: biomedicus-default-pipeline Endpoint: 127.0.0.1:55000 + - Identifier: biomedicus-rtf-to-text + Endpoint: 127.0.0.1:55001 diff --git a/tools/docker/rest_e2e.sh b/tools/docker/rest_e2e.sh index 9b2d2a57..0e547c74 100644 --- a/tools/docker/rest_e2e.sh +++ b/tools/docker/rest_e2e.sh @@ -4,4 +4,5 @@ b9 deploy --rtf --host 0.0.0.0 > services.log & tail -f -n0 services.log | grep -qe "Done deploying all servers." b9 serve-pipeline --rtf --include-label-text -p 55000 > serve-pipeline.log & +b9 serve-rtf-to-text -p 55001 > serve-rtf-to-text.log & MTAP_CONFIG=gateway.yml mtap-gateway -v=3 -logtostderr