Skip to content

Commit

Permalink
Merge #450 from branch 'useFixInsteadOfMorph' of https://github.com/h…
Browse files Browse the repository at this point in the history
  • Loading branch information
dr0i committed Sep 18, 2023
2 parents 73b476e + 67c96c1 commit 4ab6a99
Show file tree
Hide file tree
Showing 49 changed files with 102,615 additions and 2,515 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,17 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install metafacture-core
run: |
git clone https://github.com/metafacture/metafacture-core.git
cd metafacture-core
git checkout 5.7.0-rc1
./gradlew publishToMavenLocal
- name: Install metafacture-fix
run: |
git clone https://github.com/metafacture/metafacture-fix.git
cd metafacture-fix
git checkout master
./gradlew publishToMavenLocal
- name: Run tests
run: sbt update test
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ data/*
*.txt
app/transformation/input/*.xml
app/transformation/input/*.csv
test/transformation/output/*
.cache*
/bin/
application-log*.gz
app/transformation/input/*.dat
6 changes: 3 additions & 3 deletions app/controllers/Accept.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ enum Format {
N_TRIPLE("nt", "application/n-triples", "text/plain"), //
TURTLE("ttl", "text/turtle", "application/x-turtle");

String[] types;
String queryParamString;
final String[] types;
final String queryParamString;

private Format(String format, String... types) {
Format(String format, String... types) {
this.queryParamString = format;
this.types = types;
}
Expand Down
11 changes: 6 additions & 5 deletions app/controllers/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.index.query.GeoPolygonQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.search.sort.SortParseElement;

Expand Down Expand Up @@ -510,7 +511,7 @@ private static String defaultFields() {

private static String searchQueryResult(String q, String location, int from,
int size, String aggregations) {
String result = null;
String result;
if (location == null || location.isEmpty()) {
result = buildSimpleQuery(q, from, size, aggregations);
} else {
Expand Down Expand Up @@ -619,8 +620,8 @@ static String[] defaultAggregations() {

private static String returnAsJson(SearchResponse queryResponse) {
List<Map<String, Object>> hits =
Arrays.asList(queryResponse.getHits().hits()).stream()
.map(hit -> hit.getSource()).collect(Collectors.toList());
Arrays.stream(queryResponse.getHits().hits())
.map(SearchHit::getSource).collect(Collectors.toList());
ObjectNode object = Json.newObject();
object.put("@context",
CONFIG.getString("host") + routes.Application.context());
Expand Down Expand Up @@ -714,8 +715,8 @@ private static Result resultFor(String id, JsonNode json, String format) {

private static Pair<String, String> contentAndType(JsonNode responseJson,
String responseFormat) {
String content = "";
String contentType = "";
String content;
String contentType;
switch (responseFormat) {
case "rdf": {
content = RdfConverter.toRdf(responseJson.toString(), RdfFormat.RDF_XML);
Expand Down
36 changes: 16 additions & 20 deletions app/controllers/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ public ConfigurableNode(Settings settings,
}
}

private static Settings clientSettings =
private final static Settings clientSettings =
Settings.settingsBuilder().put("path.home", ".")
.put("http.port", Application.CONFIG.getString("index.es.port.http"))
.put("transport.tcp.port",
Application.CONFIG.getString("index.es.port.tcp"))
.put("script.default_lang", "native").build();

private static Node node = new ConfigurableNode(
private final static Node node = new ConfigurableNode(
nodeBuilder().settings(clientSettings).local(true).getSettings().build(),
Arrays.asList(BundlePlugin.class, LocationAggregation.class, Zero.class))
.start();
Expand Down Expand Up @@ -116,7 +116,7 @@ public static void initialize(String pathToJson) throws IOException, Elasticsear
long minimumSize =
Long.parseLong(Application.CONFIG.getString("index.file.minsize"));
if (new File(pathToJson).length() >= minimumSize) {
createEmptyIndex(CLIENT, INDEX_NAME, "conf/index-settings.json");
createEmptyIndex();
indexData(CLIENT, pathToJson, INDEX_NAME);
}
else {
Expand Down Expand Up @@ -189,25 +189,21 @@ private static SearchRequestBuilder withAggregations(
return searchRequest;
}

static void createEmptyIndex(final Client aClient, final String aIndexName,
final String aPathToIndexSettings) throws IOException {
deleteIndex(aClient, aIndexName);
static void createEmptyIndex() throws IOException {
deleteIndex(Index.CLIENT, Index.INDEX_NAME);
CreateIndexRequestBuilder cirb =
aClient.admin().indices().prepareCreate(aIndexName);
if (aPathToIndexSettings != null) {
String settingsMappings = Files.lines(Paths.get(aPathToIndexSettings))
.collect(Collectors.joining());
cirb.setSource(settingsMappings);
}
Index.CLIENT.admin().indices().prepareCreate(Index.INDEX_NAME);
String settingsMappings = Files.lines(Paths.get("conf/index-settings.json"))
.collect(Collectors.joining());
cirb.setSource(settingsMappings);
cirb.execute().actionGet();
aClient.admin().indices().refresh(new RefreshRequest()).actionGet();
Index.CLIENT.admin().indices().refresh(new RefreshRequest()).actionGet();
}

static void indexData(final Client aClient, final String aPath,
final String aIndex) throws IOException, ElasticsearchException {
final BulkRequestBuilder bulkRequest = aClient.prepareBulk();
static void indexData(final Client aClient, final String aPath, final String aIndex) throws IOException, ElasticsearchException {
final BulkRequestBuilder bulkRequest = Index.CLIENT.prepareBulk();
try (BufferedReader br =
new BufferedReader(new InputStreamReader(new FileInputStream(aPath),
new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(aPath)),
StandardCharsets.UTF_8))) {
readData(bulkRequest, br, aClient, aIndex);
}
Expand All @@ -224,8 +220,8 @@ private static void readData(final BulkRequestBuilder bulkRequest,
final ObjectMapper mapper = new ObjectMapper();
String line;
int currentLine = 1;
String organisationData = null;
String[] idUriParts = null;
String organisationData;
String[] idUriParts;
String organisationId = null;

// First line: index with id, second line: source
Expand All @@ -237,7 +233,7 @@ private static void readData(final BulkRequestBuilder bulkRequest,
organisationId = idUriParts[idUriParts.length - 1].replace("#!", "");
} else {
organisationData = line;
bulkRequest.add(client.prepareIndex(aIndex, INDEX_TYPE, organisationId)
bulkRequest.add(Index.CLIENT.prepareIndex(Index.INDEX_NAME, INDEX_TYPE, organisationId)
.setSource(organisationData));
}
currentLine++;
Expand Down
30 changes: 15 additions & 15 deletions app/controllers/RdfConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,18 @@

package controllers;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;

import com.github.jsonldjava.core.JsonLdError;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.jena.JenaTripleCallback;
import com.github.jsonldjava.utils.JsonUtils;
import com.hp.hpl.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;

import play.Logger;

/**
* Helper class for converting JsonLd to RDF.
*
*
* @author Fabian Steeg (fsteeg)
*
*/
Expand All @@ -24,7 +22,7 @@ public class RdfConverter {
* RDF serialization formats.
*/
@SuppressWarnings("javadoc")
public static enum RdfFormat {
public enum RdfFormat {
RDF_XML("RDF/XML"), //
N_TRIPLE("N-TRIPLE"), //
TURTLE("TURTLE");
Expand All @@ -47,29 +45,31 @@ public String getName() {
*/
public static String toRdf(final String jsonLd, final RdfFormat format) {
try {
final Object jsonObject = JsonUtils.fromString(jsonLd);
final JenaTripleCallback callback = new JenaTripleCallback();
final Model model = (Model) JsonLdProcessor.toRDF(jsonObject, callback);
//convert json-ld string into InputStream as is required by the read() function.
final InputStream targetStream = new ByteArrayInputStream(jsonLd.getBytes());
final Model model = ModelFactory.createDefaultModel() ;

model.read(targetStream, "", "JSON-LD");
model.setNsPrefix("bf", "http://id.loc.gov/ontologies/bibframe/");
model.setNsPrefix("bibo", "http://purl.org/ontology/bibo/");
model.setNsPrefix("dc", "http://purl.org/dc/elements/1.1/");
model.setNsPrefix("dcterms", "http://purl.org/dc/terms/");
model.setNsPrefix("gndo", "http://d-nb.info/standards/elementset/gnd#");
model.setNsPrefix("gndo", "https://d-nb.info/standards/elementset/gnd#");
model.setNsPrefix("lv", "http://purl.org/lobid/lv#");
model.setNsPrefix("mo", "http://purl.org/ontology/mo/");
model.setNsPrefix("org", "http://www.w3.org/ns/org#");
model.setNsPrefix("owl", "http://www.w3.org/2002/07/owl#");
model.setNsPrefix("rdau", "http://rdaregistry.info/Elements/u/");
model.setNsPrefix("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
model.setNsPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
model.setNsPrefix("schema", "https://schema.org/");
model.setNsPrefix("schema", "http://schema.org/");
model.setNsPrefix("skos", "http://www.w3.org/2004/02/skos/core#");
model.setNsPrefix("wdrs", "http://www.w3.org/2007/05/powder-s#");
model.setNsPrefix("xsd", "http://www.w3.org/2001/XMLSchema#");

final StringWriter writer = new StringWriter();
model.write(writer, format.getName());
return writer.toString();
} catch (IOException | JsonLdError e) {
} catch ( Exception e) {
Logger.error(e.getMessage(), e);
}
return null;
Expand Down
5 changes: 2 additions & 3 deletions app/controllers/Reconcile.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public static Result reconcile() {

private static List<JsonNode> mapToResults(String mainQuery,
SearchHits searchHits) {
return Arrays.asList(searchHits.getHits()).stream().map(hit -> {
return Arrays.stream(searchHits.getHits()).map(hit -> {
Map<String, Object> map = hit.getSource();
ObjectNode resultForHit = Json.newObject();
resultForHit.put("id", hit.getId());
Expand All @@ -101,8 +101,7 @@ private static SearchResponse executeQuery(Entry<String, JsonNode> entry,
QueryBuilders.simpleQueryStringQuery(queryString);
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery().must(stringQuery)
.must(QueryBuilders.existsQuery("type"));
SearchResponse response = Index.executeQuery(0, limit, boolQuery, "");
return response;
return Index.executeQuery(0, limit, boolQuery, "");
}

private static String buildQueryString(Entry<String, JsonNode> entry) {
Expand Down
13 changes: 7 additions & 6 deletions app/transformation/CsvExport.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
*/
public class CsvExport {

private JsonNode organisations;
private final JsonNode organisations;

/**
* @param json The organisations JSON data to export
Expand All @@ -35,23 +35,24 @@ public CsvExport(String json) {
* @return The data for the given fields in CSV format
*/
public String of(String fields) {
String csv = fields + "\n";
StringBuilder csv = new StringBuilder(fields + "\n");
for (Iterator<JsonNode> iter = organisations.elements(); iter.hasNext();) {
JsonNode org = iter.next();
csv += Arrays.asList(fields.split(",")).stream().map(field -> {
csv.append(Arrays.asList(fields.split(",")).stream().map(field -> {
try {
Object value = JsonPath.read(Configuration.defaultConfiguration()
.jsonProvider().parse(org.toString()), "$." + field);
return String.format("\"%s\"",
value.toString().replaceAll("\"", "\"\""));
} catch (PathNotFoundException x) {
}
catch (PathNotFoundException x) {
Logger.trace(x.getMessage());
// https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#empty-and-quoted-cells
return "";
}
}).collect(Collectors.joining(",")) + "\n";
}).collect(Collectors.joining(","))).append("\n");
}
return csv;
return csv.toString();
}

}
4 changes: 2 additions & 2 deletions app/transformation/GeoLookupMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ public class GeoLookupMap extends HashMap<String, String> {
Application.CONFIG.getString("transformation.geo.lookup.server");
private static final Double THRESHOLD =
Application.CONFIG.getDouble("transformation.geo.lookup.threshold");
private LookupType lookupType;
private final LookupType lookupType;

static enum LookupType {
enum LookupType {
LAT, LON
}

Expand Down
25 changes: 14 additions & 11 deletions app/transformation/TransformAll.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
package transformation;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;

import org.metafacture.metamorph.Metamorph;
import org.metafacture.metafix.Metafix;
import org.metafacture.elasticsearch.JsonToElasticsearchBulk;

import controllers.Application;
Expand Down Expand Up @@ -50,9 +51,12 @@ public static void process(String startOfUpdates, int intervalSize,
final String outputPath, String geoServer) throws IOException {
String dbsOutput = outputPath + "-dbs";
String sigelOutput = outputPath + "-sigel";
TransformSigel.process(startOfUpdates, intervalSize, sigelOutput,
geoServer);
TransformDbs.process(dbsOutput, geoServer);
TransformSigel.processBulk(sigelOutput, geoServer); //Start processing Sigel pica binary bulk.
TransformSigel.processUpdates(startOfUpdates, intervalSize, sigelOutput, geoServer); //Start process Sigel Pica XML Updates via OAI-PMH.
TransformDbs.process(dbsOutput, geoServer); //Start process DBS data.

// DBS-Data, Sigel Bulk and Updates are joined in a single ES-Bulk-file.
// DBS data first, so that ES prefers Sigel entries that come later and overwrite DBS entries if available.
try (FileWriter resultWriter = new FileWriter(outputPath)) {
writeAll(dbsOutput, resultWriter);
writeAll(sigelOutput, resultWriter);
Expand All @@ -72,19 +76,18 @@ private static void writeAll(String dbsOutput, FileWriter resultWriter)
}

static JsonToElasticsearchBulk esBulk() {
final JsonToElasticsearchBulk esBulk = new JsonToElasticsearchBulk("id",
return new JsonToElasticsearchBulk("id",
Application.CONFIG.getString("index.es.type"),
Application.CONFIG.getString("index.es.name"));
return esBulk;
}

static Metamorph morphEnriched(String geoLookupServer) {
final Metamorph morphEnriched = new Metamorph("morph-enriched.xml");
static Metafix fixEnriched(String geoLookupServer) throws FileNotFoundException {
final Metafix fixEnriched = new Metafix("conf/fix-enriched.fix");
if (geoLookupServer != null && !geoLookupServer.isEmpty()) {
morphEnriched.putMap("addLatMap", new GeoLookupMap(LookupType.LAT));
morphEnriched.putMap("addLongMap", new GeoLookupMap(LookupType.LON));
fixEnriched.putMap("addLatMap", new GeoLookupMap(LookupType.LAT));
fixEnriched.putMap("addLongMap", new GeoLookupMap(LookupType.LON));
}
return morphEnriched;
return fixEnriched;
}

}
Loading

0 comments on commit 4ab6a99

Please sign in to comment.