diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt index c9ac3732f..399023118 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt @@ -5,6 +5,11 @@ import app.ehrenamtskarte.backend.stores.importer.logRemoveDuplicates import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore import org.slf4j.Logger +/** + * Filters and removes duplicates. + * For duplicates to be detected an exact match of name, postal code and street is necessary. + * The properties of the last accepting store are used if there are multiple valid properties. + */ class FilterDuplicates(private val logger: Logger) : PipelineStep, List>() { override fun execute(input: List): List { diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt index 15f5e95b8..a4b8cd165 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt @@ -7,6 +7,10 @@ import app.ehrenamtskarte.backend.stores.importer.matchesNa import app.ehrenamtskarte.backend.stores.importer.types.LbeAcceptingStore import org.slf4j.Logger +/** + * Filter and removes [LbeAcceptingStore] with invalid data. + * These are especially stores without name, location or an invalid category. + */ class FilterLbe(private val logger: Logger): PipelineStep, List>() { private val invalidLocations = arrayOf("Musterhausen") diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt index b95ecf0f1..7a93ca58a 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt @@ -10,7 +10,12 @@ import app.ehrenamtskarte.backend.stores.importer.types.LbeAcceptingStore import org.apache.commons.text.StringEscapeUtils import org.slf4j.Logger +/** + * Maps [LbeAcceptingStore] to [AcceptingStore]. + * Properties are cleaned, decoded and converted to the correct types. + */ class MapFromLbe(private val logger: Logger) : PipelineStep, List>() { + override fun execute(input: List) = input.mapNotNull { try { AcceptingStore( diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt index 9475164ec..4f15e7425 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt @@ -9,6 +9,10 @@ import io.ktor.client.* import kotlinx.coroutines.runBlocking import org.slf4j.Logger +/** + * Filters [AcceptingStore] to prepare storing to the database. + * Stores without longitude, latitude or postal code or outside the states bounding box are removed. + */ class PostSanitizeFilter(private val logger: Logger, httpClient: HttpClient): PipelineStep, List>() { private val featureFetcher = FeatureFetcher(httpClient) diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt index 3be42a6c9..624720d74 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt @@ -7,6 +7,11 @@ import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore import org.intellij.lang.annotations.Language import org.slf4j.Logger +/** + * Sanitizes the addresses of the [AcceptingStore]. + * Postal codes are mapped to either the first five digits (german postcode format) or null. + * Street and house numbers are correctly separated. + */ class SanitizeAddress(private val logger: Logger) : PipelineStep, List>() { private val houseNumberRegex = houseNumberRegex() private val postalCodeRegex = Regex("""[0-9]{5}""") @@ -42,6 +47,12 @@ class SanitizeAddress(private val logger: Logger) : PipelineStep 'Untere Zell'|null, 'Am Römerbad 17'|'a' -> 'Am Römerbad'|'17 a', + * 'Rückermainstr. 2; 1.'|'OG' -> 'Rückermainstr.'|'2'|'1. OG' + */ private fun AcceptingStore.sanitizeStreetHouseNumber(): AcceptingStore { val isStreetPolluted = street?.find { it.isDigit() } != null val isHouseNumberPolluted = houseNumber != null && !houseNumberRegex.matches(houseNumber) @@ -73,6 +84,10 @@ class SanitizeAddress(private val logger: Logger) : PipelineStep '86150', 'Augsburg 86161 Rathausplatz' -> '86161', 'A-1234' -> null + */ private fun AcceptingStore.sanitizePostalCode(): AcceptingStore { val oldPostalCode = postalCode ?: return this diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt index cd79003ba..a2bdabed9 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt @@ -13,6 +13,11 @@ import org.geojson.Feature import org.geojson.Point import org.slf4j.Logger +/** + * Sanitize the postal code and the coordinates of the [AcceptingStore] using forward geocoding. + * If the coordinates are not inside the bounding box of the postal code, one of those is wrong. + * Then query by the address and use the coordinates OR postal code of the first match to sanitize the store data. + */ class SanitizeGeocode(private val logger: Logger, httpClient: HttpClient) : PipelineStep, List>() { private val featureFetcher = FeatureFetcher(httpClient) @@ -20,11 +25,6 @@ class SanitizeGeocode(private val logger: Logger, httpClient: HttpClient) : Pipe input.map { it.sanitize() } } - /** - * Sanitize the postal code and the coordinates of the [AcceptingStore] using forward geocoding. - * If the coordinates are not inside the bounding box of the postal code, one of those is wrong. - * Then query by the address and use the coordinates OR postal code of the first match to sanitize the store data. - */ private suspend fun AcceptingStore.sanitize(): AcceptingStore { if (street?.contains(STREET_EXCLUDE_PATTERN) == true) return this diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt index c54c867ff..cba850c99 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt @@ -9,6 +9,10 @@ import org.jetbrains.exposed.sql.transactions.transaction import org.postgis.Point import org.slf4j.Logger +/** + * Stores the given [AcceptingStore] to the database. + * Longitude, latitude and postal code of [AcceptingStore] must not be null. + */ class Store(private val logger: Logger, private val manualImport: Boolean) : PipelineStep, Unit>() { override fun execute(input: List) { @@ -20,13 +24,9 @@ class Store(private val logger: Logger, private val manualImport: Boolean) : Pip Addresses.deleteAll() input.forEachIndexed { done, acceptingStore -> - if (acceptingStore.postalCode == null) { - logger.info("Skipping '${acceptingStore.name}' because its postal code is null.") - return@forEachIndexed - } val address = AddressEntity.new { street = acceptingStore.streetWithHouseNumber - postalCode = acceptingStore.postalCode + postalCode = acceptingStore.postalCode!! locaction = acceptingStore.location countryCode = acceptingStore.countryCode }