Skip to content

Commit

Permalink
Fix problems introduced by duplicates (#137)
Browse files Browse the repository at this point in the history
* Throw an exception when duplicates are found

* When dupes are found, append a "-dedupe" to them.

* Change the way product refs are being generated.

Add a salt of randomness to avoid conflicts.
  • Loading branch information
almet authored Nov 10, 2022
1 parent 2b0459e commit ced93b0
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
42 changes: 42 additions & 0 deletions copanier/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import inspect
import threading
import uuid
from collections import Counter
from datetime import datetime, timedelta
from dataclasses import dataclass, field, asdict
from pathlib import Path
Expand Down Expand Up @@ -425,11 +426,52 @@ def load(cls, id):
path = cls.get_root() / f"{id}.yml"
if not path.exists():
raise DoesNotExist

def _dedupe_products(raw_data):
"""On some rare occasions, different products get
the same identifier (ref).
This function finds them and appends "-dedupe" to it.
This is not ideal but fixes the problem before it causes more
trouble (such as https://github.com/spiral-project/copanier/issues/136)
This function returns True if dupes have been found.
"""
if ('products' not in raw_data) or len(raw_data['products']) < 1:
return False

products = raw_data['products']

counter = Counter([p['ref'] for p in products])
most_common = counter.most_common(1)[0]
number_of_dupes = most_common[1]

if number_of_dupes < 2:
return False

dupe_id = most_common[0]
# Reconstruct the products list but change the duplicated ID.
counter = 0
new_products = []
for product in products:
ref = product['ref']
if ref == dupe_id:
counter = counter + 1
if counter == number_of_dupes: # Only change the last occurence.
product['ref'] = f'{ref}-dedupe'
new_products.append(product)
raw_data['products'] = new_products
return True

data = yaml.safe_load(path.read_text())
dupe_found = _dedupe_products(data)
# Tolerate extra fields (but we'll lose them if instance is persisted)
data = {k: v for k, v in data.items() if k in cls.__dataclass_fields__}
delivery = cls(**data)
delivery.id = id

if dupe_found:
delivery.persist()
return delivery

@classmethod
Expand Down
6 changes: 5 additions & 1 deletion copanier/views/products.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from datetime import datetime

import random
import string

from slugify import slugify
from .core import app
from ..models import Delivery, Product, Producer
Expand Down Expand Up @@ -153,7 +156,8 @@ async def create_product(request, response, delivery_id, producer_id):
product.producer = producer_id
form = request.form
product.update_from_form(form)
product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}")
random_string = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
product.ref = slugify(f"{producer_id}-{product.name}-{product.unit}-{random_string}")

delivery.products.append(product)
delivery.persist()
Expand Down

0 comments on commit ced93b0

Please sign in to comment.