Skip to content

Commit

Permalink
Delete NULL strings from DBS Data with MF #478
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiasNx committed Aug 10, 2023
1 parent f63f951 commit f8c5f5a
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 2 deletions.
5 changes: 5 additions & 0 deletions app/transformation/TransformDbs.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.metafacture.json.JsonEncoder;
import org.metafacture.io.LineReader;
import org.metafacture.io.ObjectWriter;
import org.metafacture.strings.StringMatcher;
import org.metafacture.io.FileOpener;
import org.metafacture.metafix.Metafix;
import java.io.FileNotFoundException;
Expand All @@ -21,12 +22,16 @@ public class TransformDbs {
static void process(final String outputPath, String geoLookupServer) throws FileNotFoundException {
final FileOpener opener = new FileOpener();
opener.setEncoding("UTF-8");
final StringMatcher matcher = new StringMatcher();
matcher.setPattern("NULL");
matcher.setReplacement("");
final CsvDecoder decoder = new CsvDecoder(',');
decoder.setHasHeader(true);
JsonEncoder encodeJson = new JsonEncoder();
encodeJson.setPrettyPrinting(true);
opener//
.setReceiver(new LineReader())//
.setReceiver(matcher)//
.setReceiver(decoder)//
.setReceiver(new Metafix("conf/fix-dbs.fix"))// Fix skips all records that have no "inr"
.setReceiver(TransformAll.fixEnriched(geoLookupServer))//
Expand Down
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ libraryDependencies ++= Seq(
"org.metafacture" % "metafacture-biblio" % "5.7.0-rc1",
"org.metafacture" % "metafacture-xml" % "5.7.0-rc1",
"org.metafacture" % "metafacture-framework" % "5.7.0-rc1",
"org.metafacture" % "metafacture-strings" % "5.7.0-rc1",
"org.metafacture" % "metafix" % "0.6.0-SNAPSHOT",
"org.xbib.elasticsearch.plugin" % "elasticsearch-plugin-bundle" % "2.3.2.0",
"com.jayway.jsonpath" % "json-path" % "2.2.0",
Expand Down
2 changes: 1 addition & 1 deletion test/transformation/output/enriched-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DE-514#!"}}
{"rs":"120630252252","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n33","type":"Concept","label":{"de":"Öffentliche Bibliothek","en":"Public Library"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DE-514#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.stadtbibliothek-rathenow.de","librariesOrgID":"203084","name":"Stadtbibliothek Rathenow","containedIn":"http://sws.geonames.org/6550611/","location":[{"address":{"streetAddress":"Schleusenplatz 4","addressLocality":"Rathenow","postalCode":"14712","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Mo, Di, Do: 10.00-18.00, Fr: 10.00-14.00","type":"OpeningHoursSpecification"},"type":"Place"}],"id":"http://lobid.org/organisations/DE-514#!","isil":"DE-514","fundertype":{"id":"http://purl.org/lobid/fundertype#n04","type":"Concept","label":{"de":"Gemeinde","en":"Commune"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n07","type":"Concept","label":{"de":"30.001 - 100.000","en":"30,001 - 100,000"}}},"dbsID":"AA514","sameAs":["http://www.wikidata.org/entity/Q28682011","https://ld.zdb-services.de/resource/organisations/DE-514","https://librarytechnology.org/library/203084"]}
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DBS-AA600#!"}}
{"rs":"071405007144","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n36","type":"Concept","label":{"de":"Öffentliche Bibliothek für besondere Benutzergruppen","en":"Public Library for Special User Groups"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-AA600#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"NULL","librariesOrgID":"206333","name":"Patientenbücherei in der Hunsrück Klinik der Kreuznacher Diakonie","location":[{"address":{"streetAddress":"Holzbacher Str. 1","addressLocality":"Simmern","postalCode":"55469","addressCountry":"DE","type":"PostalAddress"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-AA600#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n09","type":"Concept","label":{"de":"Evangelische Kirche","en":"Protestant Church"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n01","type":"Concept","label":{"de":"bis 1.000","en":"up to 1,000"}}},"dbsID":"AA600","sameAs":["https://librarytechnology.org/library/206333"]}
{"rs":"071405007144","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n36","type":"Concept","label":{"de":"Öffentliche Bibliothek für besondere Benutzergruppen","en":"Public Library for Special User Groups"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-AA600#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","librariesOrgID":"206333","name":"Patientenbücherei in der Hunsrück Klinik der Kreuznacher Diakonie","location":[{"address":{"streetAddress":"Holzbacher Str. 1","addressLocality":"Simmern","postalCode":"55469","addressCountry":"DE","type":"PostalAddress"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-AA600#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n09","type":"Concept","label":{"de":"Evangelische Kirche","en":"Protestant Church"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n01","type":"Concept","label":{"de":"bis 1.000","en":"up to 1,000"}}},"dbsID":"AA600","sameAs":["https://librarytechnology.org/library/206333"]}
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DBS-DB675#!"}}
{"rs":"032540021021","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n81","type":"Concept","label":{"de":"Wissenschaftliche Spezialbibliothek","en":"Academic Special Library"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-DB675#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.stadtarchiv-hildesheim.de","provides":"https://webopac.stadt-hildesheim.de/libero/WebOpac.cls","name":"Roemer-Museum, Bibliothek","location":[{"address":{"streetAddress":"Am Steine 1/2","addressLocality":"Hildesheim","postalCode":"31134","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Di. Mi.: 09.00 - 16.00 Uhr. Do: 09.00 - 18.00 Uhr","type":"OpeningHoursSpecification"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-DB675#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n04","type":"Concept","label":{"de":"Gemeinde","en":"Commune"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n07","type":"Concept","label":{"de":"30.001 - 100.000","en":"30,001 - 100,000"}}},"dbsID":"DB675"}
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DE-38M#!"}}
Expand Down
2 changes: 1 addition & 1 deletion test/transformation/output/enriched-test.json-dbs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DE-514#!"}}
{"rs":"120630252252","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n33","type":"Concept","label":{"de":"Öffentliche Bibliothek","en":"Public Library"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DE-514#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.stadtbibliothek-rathenow.de","librariesOrgID":"203084","name":"Stadtbibliothek Rathenow","containedIn":"http://sws.geonames.org/6550611/","location":[{"address":{"streetAddress":"Schleusenplatz 4","addressLocality":"Rathenow","postalCode":"14712","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Mo, Di, Do: 10.00-18.00, Fr: 10.00-14.00","type":"OpeningHoursSpecification"},"type":"Place"}],"id":"http://lobid.org/organisations/DE-514#!","isil":"DE-514","fundertype":{"id":"http://purl.org/lobid/fundertype#n04","type":"Concept","label":{"de":"Gemeinde","en":"Commune"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n07","type":"Concept","label":{"de":"30.001 - 100.000","en":"30,001 - 100,000"}}},"dbsID":"AA514","sameAs":["http://www.wikidata.org/entity/Q28682011","https://ld.zdb-services.de/resource/organisations/DE-514","https://librarytechnology.org/library/203084"]}
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DBS-AA600#!"}}
{"rs":"071405007144","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n36","type":"Concept","label":{"de":"Öffentliche Bibliothek für besondere Benutzergruppen","en":"Public Library for Special User Groups"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-AA600#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"NULL","librariesOrgID":"206333","name":"Patientenbücherei in der Hunsrück Klinik der Kreuznacher Diakonie","location":[{"address":{"streetAddress":"Holzbacher Str. 1","addressLocality":"Simmern","postalCode":"55469","addressCountry":"DE","type":"PostalAddress"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-AA600#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n09","type":"Concept","label":{"de":"Evangelische Kirche","en":"Protestant Church"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n01","type":"Concept","label":{"de":"bis 1.000","en":"up to 1,000"}}},"dbsID":"AA600","sameAs":["https://librarytechnology.org/library/206333"]}
{"rs":"071405007144","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n36","type":"Concept","label":{"de":"Öffentliche Bibliothek für besondere Benutzergruppen","en":"Public Library for Special User Groups"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-AA600#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","librariesOrgID":"206333","name":"Patientenbücherei in der Hunsrück Klinik der Kreuznacher Diakonie","location":[{"address":{"streetAddress":"Holzbacher Str. 1","addressLocality":"Simmern","postalCode":"55469","addressCountry":"DE","type":"PostalAddress"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-AA600#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n09","type":"Concept","label":{"de":"Evangelische Kirche","en":"Protestant Church"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n01","type":"Concept","label":{"de":"bis 1.000","en":"up to 1,000"}}},"dbsID":"AA600","sameAs":["https://librarytechnology.org/library/206333"]}
{"index":{"_index":"organisations","_type":"organisation","_id":"http://lobid.org/organisations/DBS-DB675#!"}}
{"rs":"032540021021","type":"Library","classification":{"id":"http://purl.org/lobid/libtype#n81","type":"Concept","label":{"de":"Wissenschaftliche Spezialbibliothek","en":"Academic Special Library"}},"mainEntityOfPage":{"id":"http://lobid.org/organisations/DBS-DB675#!","wasGeneratedBy":{"type":"Activity","used":[{"sourceOrganisation":{"id":"https://www.bibliotheksstatistik.de/","label":"Deutsche Bibliotheksstatistik (DBS)"}}]}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.stadtarchiv-hildesheim.de","provides":"https://webopac.stadt-hildesheim.de/libero/WebOpac.cls","name":"Roemer-Museum, Bibliothek","location":[{"address":{"streetAddress":"Am Steine 1/2","addressLocality":"Hildesheim","postalCode":"31134","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Di. Mi.: 09.00 - 16.00 Uhr. Do: 09.00 - 18.00 Uhr","type":"OpeningHoursSpecification"},"type":"Place"}],"id":"http://lobid.org/organisations/DBS-DB675#!","fundertype":{"id":"http://purl.org/lobid/fundertype#n04","type":"Concept","label":{"de":"Gemeinde","en":"Commune"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n07","type":"Concept","label":{"de":"30.001 - 100.000","en":"30,001 - 100,000"}}},"dbsID":"DB675"}

0 comments on commit f8c5f5a

Please sign in to comment.