Skip to content

Commit

Permalink
Adding REST rtf only pipeline, cleaning up Java warnings, adjusting t…
Browse files Browse the repository at this point in the history
…o MTAP changes
  • Loading branch information
benknoll-umn committed Oct 3, 2024
1 parent 2106259 commit df3954c
Show file tree
Hide file tree
Showing 36 changed files with 161 additions and 97 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -385,3 +385,6 @@ gradle-app.setting
Pipfile
Pipfile.lock
test_results.yml

# Visual Studio Code
.vscode
5 changes: 1 addition & 4 deletions biomedicus_client/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ classifiers = [
'Topic :: Text Processing :: Linguistic'
]
dependencies = [
"mtap>=1.4.2",
"grpcio==1.66.1",
"grpcio-health-checking==1.66.1",
"grpcio-status==1.66.1",
"mtap>1.4.2",
"tqdm==4.66.5",
"importlib_resources==6.4.0",
"pyyaml==6.0.2"
Expand Down
2 changes: 1 addition & 1 deletion java/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ repositories {
dependencies {
implementation group: 'org.jetbrains', name: 'annotations', version: '24.1.0'

implementation group: 'edu.umn.nlpie', name: 'mtap', version: '1.4.0'
implementation group: 'edu.umn.nlpie', name: 'mtap', version: '1.4.3-SNAPSHOT'

implementation group: 'org.slf4j', name: 'slf4j-api', version: '2.0.16'
implementation group: 'args4j', name: 'args4j', version: '2.37'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import edu.umn.biomedicus.common.tokenization.WhitespaceTokenizer;
import edu.umn.biomedicus.common.tuples.Pair;
import edu.umn.biomedicus.serialization.YamlSerialization;
import edu.umn.nlpie.mtap.MTAP;
import edu.umn.nlpie.mtap.common.JsonObject;
import edu.umn.nlpie.mtap.common.JsonObjectBuilder;
import edu.umn.nlpie.mtap.model.*;
Expand Down Expand Up @@ -277,7 +276,7 @@ void writeToDirectory(Path outputDir,
protected void process(
@NotNull Document document,
@NotNull JsonObject params,
@NotNull JsonObjectBuilder result
@NotNull JsonObjectBuilder<?, ?> result
) {
LOGGER.debug("Detecting acronyms in a document.");
Boolean labelOtherSenses = params.getBooleanValue("label_other_senses");
Expand Down Expand Up @@ -320,6 +319,15 @@ protected void process(
}
}

@Override
public void shutdown() {
try {
senseVectors.close();
} catch (IOException e) {
LOGGER.error("Error closing sense vectors dictionary", e);
}
}

private boolean allExcluded(List<GenericLabel> posTags) {
return posTags.stream()
.map(tagLabel -> PartsOfSpeech.forTag(tagLabel.getStringValue("tag")))
Expand Down Expand Up @@ -480,6 +488,7 @@ public AcronymDetectorProcessor build() throws IOException {
LOGGER.info("Loading acronym vector space: {}", vectorSpace);
WordVectorSpace wordVectorSpace = WordVectorSpace.load(vectorSpace);
LOGGER.info("Loading acronym sense map: {}. inMemory = {}", senseMap, sensesInMemory);
@SuppressWarnings("resource") // This is closed when the processors is shutdown.
SenseVectors senseVectors = new RocksDBSenseVectors(senseMap, false)
.inMemory(sensesInMemory);
AlignmentModel alignment = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class AlignmentModel {
private static final Logger LOGGER = LoggerFactory.getLogger(AlignmentModel.class);
private List<String> longforms;
private boolean caseSensitive;
private Map<String, Double> cachedScores = new HashMap<>();

private AlignmentModel(List<String> longforms, boolean caseSensitive) {
this.longforms = longforms;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public class OrthographicAcronymModelTrainer {
private static final double discounting = .9;
private final boolean caseSensitive;
private final int[] symbols;
private final int[] chars;
private final double[][][] longformProbs;
private final double[][][] abbrevProbs;
/**
Expand All @@ -50,8 +49,6 @@ public OrthographicAcronymModelTrainer(boolean caseSensitive) {
this.caseSensitive = caseSensitive;
symbols = caseSensitive ? OrthographicAcronymModel.CASE_SENS_SYMBOLS
: OrthographicAcronymModel.CASE_INSENS_SYMBOLS;
chars = caseSensitive ? OrthographicAcronymModel.CASE_SENS_CHARS
: OrthographicAcronymModel.CASE_INSENS_CHARS;
longformProbs = new double[symbols.length][symbols.length][symbols.length];
abbrevProbs = new double[symbols.length][symbols.length][symbols.length];
longformsLower = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public RocksDBSenseVectors(Path path, boolean forWriting) {
RocksDB.loadLibrary();

if (forWriting) {
try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) {
try (Options options = new Options()) {
options.setCreateIfMissing(true);
options.prepareForBulkLoad();
rocksDB = RocksDB.open(options, path.toString());
} catch (RocksDBException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class WordVectorSpace {
* Default weighting function is sigmoid that decreases with distance (to 0.5 at maxDist)
* Need to cast to Serializable to save it
*/
@SuppressWarnings("unchecked")
private static final BiFunction<Integer, Double, Double> DIST_WEIGHT = (BiFunction<Integer, Double, Double> & Serializable) (dist, maxDist) ->
1.0 / (1.0 + Math.exp(SLOPE * (Math.abs(dist) - maxDist)));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,11 @@ private static List<String> loadUnitsList() {
}
}
InputStream is = Tokenizer.class.getResourceAsStream("unitsList.txt");
return new BufferedReader(new InputStreamReader(is)).lines().collect(Collectors.toList());
try (BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
return reader.lines().collect(Collectors.toList());
} catch (IOException e) {
throw new IllegalStateException("Failed to load units list.", e);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import org.jetbrains.annotations.Nullable;
import java.io.Serializable;
import java.util.regex.Pattern;

/**
* Represents a pair of word and isCapitalized.
Expand All @@ -33,11 +32,6 @@ public class WordCap implements Comparable<WordCap>, Serializable {
*/
private static final long serialVersionUID = -5981094615088473604L;

/**
* Pattern which matches digits.
*/
private static final Pattern DIGITS = Pattern.compile("\\d");

/**
* The word in the word-capitalization.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,15 @@

package edu.umn.biomedicus.common.utilities;

import org.rocksdb.DBOptions;
import org.rocksdb.InfoLogLevel;
import org.rocksdb.Logger;
import org.rocksdb.Options;

public class RocksToSLF4JLogger extends Logger {

private final org.slf4j.Logger slf4jLogger;

public RocksToSLF4JLogger(Options options, org.slf4j.Logger slf4jLogger) {
super(options);
this.slf4jLogger = slf4jLogger;
}

public RocksToSLF4JLogger(DBOptions dboptions, org.slf4j.Logger slf4jLogger) {
super(dboptions);
public RocksToSLF4JLogger(InfoLogLevel logLevel, org.slf4j.Logger slf4jLogger) {
super(logLevel);
this.slf4jLogger = slf4jLogger;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,9 @@ private void doWork() throws IOException {
}
}
}
try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) {
try (Options options = new Options()) {
options.setCreateIfMissing(true);
options.prepareForBulkLoad();
try (RocksDB phrases = RocksDB.open(options, dbPath.resolve("phrases").toString());
RocksDB lowercase = RocksDB.open(options, dbPath.resolve("lowercase").toString())) {
int wrote = 0;
Expand Down Expand Up @@ -306,8 +308,10 @@ private void doWork() throws IOException {
}

int wrote = 0;
try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad();
try (Options options = new Options();
RocksDB normsDb = RocksDB.open(options, dbPath.resolve("norms").toString())) {
options.setCreateIfMissing(true);
options.prepareForBulkLoad();
for (Entry<String, List<ConceptRow>> entry : map.entrySet()) {
List<ConceptRow> suiCuiTuis = entry.getValue();
byte[] suiCuiTuiBytes = getBytes(suiCuiTuis);
Expand Down
47 changes: 21 additions & 26 deletions java/src/main/java/edu/umn/biomedicus/concepts/ConceptsUtility.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package edu.umn.biomedicus.concepts;

import edu.umn.biomedicus.common.config.Config;
import org.jetbrains.annotations.Nullable;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
Expand All @@ -33,41 +32,37 @@
* Utility
*/
public class ConceptsUtility {
@Option(
name = "--db-path",
metaVar = "PATH_TO",
usage = "Optional override path to the concepts dictionary."
)
@Option(name = "--db-path", metaVar = "PATH_TO", usage = "Optional override path to the concepts dictionary.")
private @Nullable Path dbPath = null;

public void listenToConsole() throws IOException, RocksDBException {
Scanner scanner = new Scanner(System.in);
System.out.println("Reading concepts from database");
try (Scanner scanner = new Scanner(System.in)) {
System.out.println("Reading concepts from database");

DictionaryConceptDetector.ConceptsOptions conceptsOptions = new DictionaryConceptDetector.ConceptsOptions();
conceptsOptions.setDbPath(dbPath);
conceptsOptions.setInMemory(true);
ConceptDictionary dictionary = DictionaryConceptDetector.loadConceptsDictionary(conceptsOptions);
DictionaryConceptDetector.ConceptsOptions conceptsOptions = new DictionaryConceptDetector.ConceptsOptions();
conceptsOptions.setDbPath(dbPath);
conceptsOptions.setInMemory(true);
ConceptDictionary dictionary = DictionaryConceptDetector.loadConceptsDictionary(conceptsOptions);

while (true) {
System.out.print("Q: ");
String query = scanner.nextLine();
if ("!q".equals(query)) {
return;
} else if (CUI_PATTERN.matcher(query).matches()) {
for (PhraseConcept phraseConcept : dictionary.withCui(new CUI(query))) {
System.out.println(phraseConcept.toString());
}
} else {
System.out.println("Searching for " + query);
for (PhraseConcept phraseConcept : dictionary.withWord(query)) {
System.out.println(phraseConcept.toString());
while (true) {
System.out.print("Q: ");
String query = scanner.nextLine();
if ("!q".equals(query)) {
return;
} else if (CUI_PATTERN.matcher(query).matches()) {
for (PhraseConcept phraseConcept : dictionary.withCui(new CUI(query))) {
System.out.println(phraseConcept.toString());
}
} else {
System.out.println("Searching for " + query);
for (PhraseConcept phraseConcept : dictionary.withWord(query)) {
System.out.println(phraseConcept.toString());
}
}
}
}
}


public static void main(String[] args) {
ConceptsUtility conceptsUtility = new ConceptsUtility();
CmdLineParser parser = new CmdLineParser(conceptsUtility);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public static void main(String[] args) {
protected void process(
@NotNull Document document,
@NotNull JsonObject params,
@NotNull JsonObjectBuilder result
@NotNull JsonObjectBuilder<?, ?> result
) {
LOGGER.debug("Finding concepts in document.");

Expand Down Expand Up @@ -362,8 +362,10 @@ public void run() {
sentenceNorms.add(norm);
}
} else {
for (GenericLabel genericLabel : norms.inside(sentence)) {
sentenceNorms.add(genericLabel.getStringValue("norm"));
if (norms != null) {
for (GenericLabel genericLabel : norms.inside(sentence)) {
sentenceNorms.add(genericLabel.getStringValue("norm"));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.umn.biomedicus.common.utilities.RocksToSLF4JLogger;

import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -72,7 +74,9 @@ static List<ConceptRow> toList(byte[] bytes) {
public static ConceptDictionary loadModel(Path dbPath, boolean inMemory) throws RocksDBException, IOException {
RocksDB.loadLibrary();

try (Options options = new Options().setInfoLogLevel(InfoLogLevel.ERROR_LEVEL)) {
try (Options options = new Options()) {
options.setInfoLogLevel(InfoLogLevel.ERROR_LEVEL);
options.setLogger(new RocksToSLF4JLogger(InfoLogLevel.ERROR_LEVEL, LOGGER));
LOGGER.info("Opening concepts dictionary: {}. inMemory = {}.", dbPath, inMemory);

RocksDB phrasesDB = RocksDB.openReadOnly(options, dbPath.resolve("phrases").toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ public static void main(String[] args) {
}

@Override
protected void process(@NotNull Document document, @NotNull JsonObject params, @NotNull JsonObjectBuilder result) {
protected void process(@NotNull Document document, @NotNull JsonObject params, @NotNull JsonObjectBuilder<?, ?> result) {
String termIndexName = params.getStringValue("terms_index");
if (termIndexName == null) {
termIndexName = "umls_terms";
Expand Down Expand Up @@ -249,7 +249,7 @@ protected void process(@NotNull Document document, @NotNull JsonObject params, @
Pair<ModificationType, List<GenericLabel>> searchResult = CUES.searchLeft(contextList);

if (searchResult != null) {
List<GenericLabel> cues = searchResult.second().stream().map(span -> {
searchResult.second().stream().map(span -> {
GenericLabel cue = GenericLabel.withSpan(span).build();
cueLabeler.add(cue);
return cue;
Expand All @@ -274,7 +274,7 @@ protected void process(@NotNull Document document, @NotNull JsonObject params, @

searchResult = CUES.searchRight(contextList);
if (searchResult != null) {
List<GenericLabel> cues = searchResult.second().stream().map(span -> {
searchResult.second().stream().map(span -> {
GenericLabel cue = GenericLabel.withSpan(span).build();
cueLabeler.add(cue);
return cue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public static void main(String[] args) {
protected void process(
@NotNull Document document,
@NotNull JsonObject params,
@NotNull JsonObjectBuilder result
@NotNull JsonObjectBuilder<?, ?> result
) {
LOGGER.debug("Normalizing tokens in a document.");
LabelIndex<GenericLabel> posTagIndex = document.getLabelIndex("pos_tags");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.PathOptionHandler;
import org.rocksdb.Options;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.file.Files;
Expand Down Expand Up @@ -64,8 +61,6 @@ public final class NormalizerModelBuilder {
*/
public static final int LRAGR_BASE_FORM = 4;

private static final Logger LOGGER = LoggerFactory.getLogger(NormalizerModelBuilder.class);

private static final int IGNORE_WHEN_LONGER = 100;

private static final Map<LragrPos, PartOfSpeech> LRAGR_TO_PENN;
Expand Down Expand Up @@ -191,7 +186,8 @@ public void doWork() throws IOException {

System.out.println("Creating normalizer db from " + builder.size() + " terms");

try (Options options = new Options().setCreateIfMissing(true).prepareForBulkLoad()) {
try (Options options = new Options()) {
options.setCreateIfMissing(true).prepareForBulkLoad();
try (RocksDB rocksDB = RocksDB.open(options, dbPath.toString())) {
builder.forEach((tp, ts) -> {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ public static RocksDBNormalizerModel loadModel(Path dbPath) {
RocksDBNormalizerModel(Path dbPath) {
RocksDB.loadLibrary();

try (Options options = new Options().setInfoLogLevel(InfoLogLevel.ERROR_LEVEL)) {
try (Options options = new Options()) {
options.setInfoLogLevel(InfoLogLevel.ERROR_LEVEL);
db = RocksDB.openReadOnly(options, dbPath.toString());
} catch (RocksDBException e) {
throw new RuntimeException(e);
Expand Down
3 changes: 1 addition & 2 deletions java/src/main/java/edu/umn/biomedicus/rtf/RTF.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ public static RtfParserFactory getFactory() throws IOException {
.getKeywordActionsAsMap();
PropertiesDescription propertiesDescription = PropertiesDescription
.loadFromFile("edu/umn/biomedicus/rtf/PropertiesDescription.xml");
return new RtfParserFactory(controlKeywordsDescription, keywordActionMap,
propertiesDescription);
return new RtfParserFactory(keywordActionMap, propertiesDescription);
}
}
Loading

0 comments on commit df3954c

Please sign in to comment.