Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not filter locations which do not have coordinates #461

Merged
merged 7 commits into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
package app.ehrenamtskarte.backend.stores.geocoding

import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore
import org.geojson.Feature
import kotlin.math.*

const val DISTANCE_THRESHOLD_IN_KM = 1.0

/**
* Returns whether the accepting store is closer than [DISTANCE_THRESHOLD_IN_KM] to the bounding box of the [feature]
*/
fun AcceptingStore.isCloseToBoundingBox(feature: Feature): Boolean {
if (latitude == null || longitude == null) return false
return isCloseTo(feature.bbox, latitude, longitude, DISTANCE_THRESHOLD_IN_KM)
}

/**
* Returns whether the accepting store is positioned inside the bounding box [bbox]
*/
fun AcceptingStore.isInBoundingBox(bbox: DoubleArray): Boolean {
if (latitude == null || longitude == null) return false
return bbox[0] <= longitude && longitude <= bbox[2] && bbox[1] <= latitude && latitude <= bbox[3]
}

/**
* Returns whether [latitude] and [longitude] are closer than [thresholdInKm] to the bounding box [bbox]
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ object DataImporter {
val logger = LoggerFactory.getLogger(DataImporter::class.java)
val pipe = {
Unit.addStep(Download(logger, httpClient), logger) { logger.info("== Download raw data ==" )}
.addStep(Filter(logger), logger) { logger.info("== Filter raw data ==") }
.addStep(PreSanitizeFilter(logger), logger) { logger.info("== Filter raw data ==") }
.addStep(Map(logger), logger) { logger.info("== Map raw to internal data ==") }
.addStep(Sanitize(logger, httpClient), logger) { logger.info("== Sanitize data ==") }
.addStep(PostSanitizeFilter(logger, httpClient), logger) { logger.info("== Filter sanitized data ==") }
.addStep(Encode(logger), logger) { logger.info("== Handle encoding issues ==") }
.addStep(Store(logger, manualImport), logger) { logger.info("== Store remaining data to db ==") }
}
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class Map(private val logger: Logger) : PipelineStep<List<LbeAcceptingStore>, Li
cleanPostalCode(it.postalCode),
it.street.clean(),
it.houseNumber.clean(),
it.longitude!!.replace(",", ".").toDouble(),
it.latitude!!.replace(",", ".").toDouble(),
it.longitude.safeToDouble(),
it.latitude.safeToDouble(),
categoryId(it.category!!),
it.email.clean(),
it.telephone.clean(),
Expand All @@ -35,6 +35,10 @@ class Map(private val logger: Logger) : PipelineStep<List<LbeAcceptingStore>, Li
}
}

private fun String?.safeToDouble(): Double? {
return this?.clean()?.replace(",", ".")?.toDouble()
}

private fun categoryId(category: String): Int {
val int = category.toInt()
return if (int == ALTERNATIVE_MISCELLANEOUS_CATEGORY) MISCELLANEOUS_CATEGORY else int
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package app.ehrenamtskarte.backend.stores.importer.steps

import app.ehrenamtskarte.backend.stores.geocoding.FeatureFetcher
import app.ehrenamtskarte.backend.common.STATE
import app.ehrenamtskarte.backend.stores.geocoding.isInBoundingBox
import app.ehrenamtskarte.backend.stores.importer.PipelineStep
import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore
import io.ktor.client.*
import kotlinx.coroutines.runBlocking
import org.slf4j.Logger

class PostSanitizeFilter(private val logger: Logger, httpClient: HttpClient): PipelineStep<List<AcceptingStore>, List<AcceptingStore>>() {
private val featureFetcher = FeatureFetcher(httpClient)

override fun execute(input: List<AcceptingStore>): List<AcceptingStore> = runBlocking {
val stateBbox = featureFetcher.queryFeatures(listOf(Pair("state", STATE))).first().bbox

input.filter {
if (it.longitude == null || it.latitude == null) {
logger.info("'${it.name}, ${it.location}' was filtered out because longitude or latitude are null")
return@filter false
}
if (!it.isInBoundingBox(stateBbox)) {
logger.info("'${it.name}, ${it.location}' was filtered out because it is outside of $STATE")
return@filter false
}
if (it.postalCode == null) {
// Probably because it is outside of the state but inside the bounding box of the state
logger.info("'${it.name}, ${it.location}' was filtered out because its postal code is null")
return@filter false
}
true
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package app.ehrenamtskarte.backend.stores.importer.steps

import app.ehrenamtskarte.backend.stores.importer.PipelineStep
import app.ehrenamtskarte.backend.stores.importer.matchesNa
import app.ehrenamtskarte.backend.stores.importer.types.LbeAcceptingStore
import org.slf4j.Logger

class PreSanitizeFilter(private val logger: Logger): PipelineStep<List<LbeAcceptingStore>, List<LbeAcceptingStore>>() {
private val invalidLocations = arrayOf("Musterhausen")

override fun execute(input: List<LbeAcceptingStore>): List<LbeAcceptingStore> = input.filter { filterLbe(it) }

private fun filterLbe(store: LbeAcceptingStore) = try {
store.isValidName() && store.isValidCategory() && store.isValidLocation()
} catch (e: Exception) {
logger.info("$store was filtered out because of an unknown exception while filtering", e)
false
}

private fun String?.isUnavailable(): Boolean {
return this.isNullOrBlank() || matchesNa(this)
}

private fun LbeAcceptingStore.isValidName(): Boolean {
return if (name.isUnavailable()) {
logger.info("'$this' was filtered out because name '$name' is invalid")
false
} else {
true
}
}

private fun LbeAcceptingStore.isValidLocation(): Boolean {
return if (location.isUnavailable() || invalidLocations.contains(location)) {
logger.info("'$name' was filtered out because location '$location' is invalid")
false
} else {
true
}
}

private fun LbeAcceptingStore.isValidCategory(): Boolean {
val validCategories = (0..MISCELLANEOUS_CATEGORY) + listOf(ALTERNATIVE_MISCELLANEOUS_CATEGORY)
val valid = category?.toIntOrNull() in validCategories

if (!valid)
logger.info("'$name' was filtered out because category '$category' is invalid")

return valid
}

}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package app.ehrenamtskarte.backend.stores.importer.steps

import app.ehrenamtskarte.backend.common.STATE
import app.ehrenamtskarte.backend.stores.geocoding.FeatureFetcher
import app.ehrenamtskarte.backend.stores.geocoding.isCloseTo
import app.ehrenamtskarte.backend.stores.geocoding.isCloseToBoundingBox
import app.ehrenamtskarte.backend.stores.geocoding.isInBoundingBox
import app.ehrenamtskarte.backend.stores.importer.PipelineStep
import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore
import io.ktor.client.HttpClient
Expand All @@ -11,29 +11,14 @@ import org.geojson.Feature
import org.geojson.Point
import org.slf4j.Logger

const val DISTANCE_THRESHOLD_IN_KM = 1.0

// Postal code lookup fails/does not really make sense for a "Postfach"
const val STREET_EXCLUDE_PATTERN = "Postfach"

class Sanitize(private val logger: Logger, httpClient: HttpClient) : PipelineStep<List<AcceptingStore>, List<AcceptingStore>>() {
private val featureFetcher = FeatureFetcher(httpClient)

override fun execute(input: List<AcceptingStore>): List<AcceptingStore> = runBlocking {
val stateBbox = featureFetcher.queryFeatures(listOf(Pair("state", STATE))).first().bbox

input.map { it.sanitize() }.filter {
if (!it.isInBoundingBox(stateBbox)) {
logger.info("'${it.name}, ${it.location}' was filtered out because it is outside of $STATE")
return@filter false
}
if (it.postalCode == null) {
// Probably because it is outside of the state but inside the bounding box of the state
logger.info("'${it.name}, ${it.location}' was filtered out because its postal code is null")
return@filter false
}
true
}
input.map { it.sanitize() }
}

/**
Expand All @@ -44,7 +29,6 @@ class Sanitize(private val logger: Logger, httpClient: HttpClient) : PipelineSte
private suspend fun AcceptingStore.sanitize(): AcceptingStore {
if (street?.contains(STREET_EXCLUDE_PATTERN) == true) return this


val postalCodeBbox = if (postalCode != null) {
featureFetcher.queryFeatures(listOf(Pair("postalcode", postalCode))).firstOrNull()?.bbox
} else null
Expand Down Expand Up @@ -75,14 +59,6 @@ class Sanitize(private val logger: Logger, httpClient: HttpClient) : PipelineSte
return this
}

private fun AcceptingStore.isCloseToBoundingBox(feature: Feature): Boolean {
return isCloseTo(feature.bbox, latitude, longitude, DISTANCE_THRESHOLD_IN_KM)
}

private fun AcceptingStore.isInBoundingBox(bbox: DoubleArray): Boolean {
return bbox[0] <= longitude && longitude <= bbox[2] && bbox[1] <= latitude && latitude <= bbox[3]
}

private fun logChange(store: AcceptingStore, property: String, oldValue: String?, newValue: String?) {
val storeInfo = listOfNotNull(store.name, store.location, store.street, store.houseNumber).joinToString()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Store(private val logger: Logger, private val manualImport: Boolean) : Pip
PhysicalStoreEntity.new {
storeId = storeEntity.id
addressId = address.id
coordinates = Point(acceptingStore.longitude, acceptingStore.latitude)
coordinates = Point(acceptingStore.longitude!!, acceptingStore.latitude!!)
steffenkleinle marked this conversation as resolved.
Show resolved Hide resolved
}
if (manualImport)
drawSuccessBar(done, input.size)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ data class AcceptingStore(
val postalCode: String?,
val street: String?,
val houseNumber: String?,
val longitude: Double,
val latitude: Double,
val longitude: Double?,
val latitude: Double?,
val categoryId: Int,
val email: String?,
val telephone: String?,
Expand Down