Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor privacy declaration upsert to maintain constant IDs #3188

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 50 additions & 15 deletions src/fides/api/ctl/routes/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ def get_system(db: Session, fides_key: str) -> System:
return system


async def validate_privacy_declarations_data_uses(
db: AsyncSession, system: SystemSchema
) -> None:
async def validate_privacy_declarations(db: AsyncSession, system: SystemSchema) -> None:
"""
Ensure that the `PrivacyDeclaration`s on the provided `System` resource reference
valid `DataUse` records.
Ensure that the `PrivacyDeclaration`s on the provided `System` resource are valid:
- that they reference valid `DataUse` records
- that there are not "duplicate" `PrivacyDeclaration`s as defined by their "logical ID"

If not, a `400` is raised
"""
logical_ids = set()
for privacy_declaration in system.privacy_declarations:
try:
await get_resource(
Expand All @@ -88,6 +88,14 @@ async def validate_privacy_declarations_data_uses(
status_code=HTTP_400_BAD_REQUEST,
detail=f"Invalid privacy declaration referencing unknown DataUse {privacy_declaration.data_use}",
)
logical_id = privacy_declaration_logical_id(privacy_declaration)
if logical_id in logical_ids:
adamsachs marked this conversation as resolved.
Show resolved Hide resolved
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail=f"Duplicate privacy declarations specified with data use {privacy_declaration.data_use}",
)

logical_ids.add(logical_id)


@system_connections_router.get(
Expand Down Expand Up @@ -162,7 +170,7 @@ async def update(
Update a System by the fides_key extracted from the request body. Defined outside of the crud routes
to add additional "system manager" permission checks.
"""
await validate_privacy_declarations_data_uses(db, resource)
await validate_privacy_declarations(db, resource)
return await update_system(resource, db)


Expand All @@ -174,7 +182,7 @@ async def upsert_system(
updated = 0
# first pass to validate privacy declarations before proceeding
for resource in resources:
await validate_privacy_declarations_data_uses(db, resource)
await validate_privacy_declarations(db, resource)

for resource in resources:
try:
Expand Down Expand Up @@ -219,21 +227,48 @@ async def upsert(
}


def privacy_declaration_logical_id(
privacy_declaration: PrivacyDeclaration,
) -> str:
"""
Helper to standardize a logical 'id' for privacy declarations.
As of now, this is based on the `data_use` and the `name` of the declaration, if provided.
"""
return f"{privacy_declaration.data_use}:{privacy_declaration.name or ''}"


async def upsert_privacy_declarations(
db: AsyncSession, resource: SystemSchema, system: System
) -> None:
"""Helper to handle the specific upsert logic for privacy declarations"""

async with db.begin():
# clear out any existing privacy declarations on the System
for existing_declaration in system.privacy_declarations:
await db.delete(existing_declaration)

# iterate through declarations specified on the request and create them
# map existing declarations by their logical identifier
existing_declarations: Dict[str, PrivacyDeclaration] = {
privacy_declaration_logical_id(existing_declaration): existing_declaration
for existing_declaration in system.privacy_declarations
}

# iterate through declarations specified on the request and upsert
# looking for "matching" existing declarations based on data_use and name
for privacy_declaration in resource.privacy_declarations:
# prepare our 'payload' for either create or update
data = privacy_declaration.dict()
data["system_id"] = system.id # add FK back to system
PrivacyDeclaration.create(db, data=data)
data["system_id"] = system.id # include FK back to system

# if we find matching declaration, remove it from our map
if existing_declaration := existing_declarations.pop(
privacy_declaration_logical_id(privacy_declaration), None
):
# and update existing declaration *in place*
existing_declaration.update(db, data=data)
else:
# otherwise, create a new declaration record
PrivacyDeclaration.create(db, data=data)
adamsachs marked this conversation as resolved.
Show resolved Hide resolved

# delete any existing privacy declarations that have not been "matched" in the request
for existing_declarations in existing_declarations.values():
await db.delete(existing_declarations)
adamsachs marked this conversation as resolved.
Show resolved Hide resolved


async def update_system(resource: SystemSchema, db: AsyncSession) -> Dict:
Expand Down Expand Up @@ -322,7 +357,7 @@ async def create(
Override `System` create/POST to handle `.privacy_declarations` defined inline,
for backward compatibility and ease of use for API users.
"""
await validate_privacy_declarations_data_uses(db, resource)
await validate_privacy_declarations(db, resource)
# copy out the declarations to be stored separately
# as they will be processed AFTER the system is added
privacy_declarations = resource.privacy_declarations
Expand Down
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,31 @@ def system(db: Session) -> System:
return system


@pytest.fixture(scope="function")
def system_multiple_decs(db: Session, system: System) -> System:
"""
Add an additional PrivacyDeclaration onto the base System to test scenarios with
multiple PrivacyDeclarations on a given system
"""
PrivacyDeclaration.create(
db=db,
data={
"name": "Collect data for third party sharing",
"system_id": system.id,
"data_categories": ["user.device.cookie_id"],
"data_use": "third_party_sharing",
"data_qualifier": "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified",
"data_subjects": ["customer"],
"dataset_references": None,
"egress": None,
"ingress": None,
},
)

db.refresh(system)
return system


@pytest.fixture(scope="function")
def system_third_party_sharing(db: Session) -> System:
system_third_party_sharing = System.create(
Expand Down
Loading