Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Fixing inconsistent SaaS connector integration tests #473

2 changes: 1 addition & 1 deletion src/fidesops/task/filter_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _defaultdict_or_array(resource: Any) -> Any:

elif isinstance(row, dict):
for key in row:
if key == target_path.levels[0]:
if target_path.levels and key == target_path.levels[0]:
if key not in saved:
saved[key] = _defaultdict_or_array(row[key])
saved[key] = select_and_save_field(
Expand Down
10 changes: 6 additions & 4 deletions tests/fixtures/saas/hubspot_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,11 @@ def hubspot_erasure_data(connection_config_hubspot, hubspot_erasure_identity_ema
# no need to subscribe contact, since creating a contact auto-subscribes them

# Allows contact to be propagated in Hubspot before calling access / erasure requests
remaining_tries = 5
retries = 10
while _contact_exists(hubspot_erasure_identity_email, connector) is False:
if remaining_tries < 1:
if not retries:
raise Exception(f"Contact with contact id {contact_id} could not be added to Hubspot")
retries -= 1
time.sleep(5)

yield contact_id
Expand All @@ -153,10 +154,11 @@ def hubspot_erasure_data(connection_config_hubspot, hubspot_erasure_identity_ema
connector.create_client().send(delete_request)

# verify contact is deleted
remaining_tries = 5
retries = 10
while _contact_exists(hubspot_erasure_identity_email, connector) is True:
if remaining_tries < 1:
if not retries:
raise Exception(f"Contact with contact id {contact_id} could not be deleted from Hubspot")
retries -= 1
time.sleep(5) # Ensures contact is deleted


Expand Down
142 changes: 136 additions & 6 deletions tests/fixtures/saas/segment_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
import os
import random
import time
from typing import Any, Dict, Generator

import pydash
import pytest
import requests
from faker import Faker
from sqlalchemy.orm import Session

from fidesops.core.config import load_toml
from fidesops.db import session
from fidesops.models.connectionconfig import (
Expand All @@ -6,13 +17,8 @@
ConnectionType,
)
from fidesops.models.datasetconfig import DatasetConfig
import pytest
import pydash
import os
from typing import Any, Dict, Generator
from tests.fixtures.application_fixtures import load_dataset
from tests.fixtures.saas_example_fixtures import load_config
from sqlalchemy.orm import Session

saas_config = load_toml("saas_config.toml")

Expand All @@ -32,7 +38,7 @@ def segment_secrets():
}


@pytest.fixture(scope="function")
@pytest.fixture(scope="session")
def segment_identity_email():
return pydash.get(saas_config, "segment.identity_email") or os.environ.get(
"SEGMENT_IDENTITY_EMAIL"
Expand Down Expand Up @@ -89,3 +95,127 @@ def segment_dataset_config(
)
yield dataset
dataset.delete(db=db)


@pytest.fixture(scope="session")
def segment_erasure_identity_email(segment_identity_email) -> str:
timestamp = int(time.time())
at_index: int = segment_identity_email.find("@")
email = f"{segment_identity_email[0:at_index]}{timestamp}{segment_identity_email[at_index:]}"
return email


def _get_user_id(email: str, secrets: Dict[str, Any]) -> str:
personas_domain = secrets["personas_domain"]
namespace_id = secrets["namespace_id"]
access_secret = secrets["access_secret"]
response = requests.get(
f"https://{personas_domain}/v1/spaces/{namespace_id}/collections/users/profiles/user_id:{email}/metadata",
auth=(access_secret, None),
)
if not response.ok:
return None

return response.json()["segment_id"]


def _get_track_events(segment_id: str, secrets: Dict[str, Any]) -> Dict[str, Any]:
personas_domain = secrets["personas_domain"]
namespace_id = secrets["namespace_id"]
access_secret = secrets["access_secret"]

response = requests.get(
f"https://{personas_domain}/v1/spaces/{namespace_id}/collections/users/profiles/{segment_id}/events",
auth=(access_secret, None),
)
if not response.ok or response.json()["data"] is None:
return None

return response.json()["data"][0]


@pytest.fixture(scope="function")
def segment_erasure_data(
segment_connection_config, segment_erasure_identity_email
) -> str:
"""Seeds a segment user and event"""
segment_secrets = segment_connection_config.secrets
if not segment_identity_email: # Don't run unnecessarily locally
return

api_domain = segment_secrets["api_domain"]
user_token = segment_secrets["user_token"]

faker = Faker()

timestamp = int(time.time())
email = segment_erasure_identity_email
first_name = faker.first_name()
last_name = faker.last_name()

# Create user
headers = {
"Content-Type": "application/json",
"Authorization": f"Basic {user_token}",
}
body = {
"userId": email,
"traits": {
"subscriptionStatus": "active",
"address": {
"city": faker.city(),
"country": faker.country(),
"postalCode": faker.postcode(),
"state": "NY",
},
"age": random.randrange(18, 99),
"avatar": "",
"industry": "data",
"description": faker.job(),
"email": email,
"firstName": first_name,
"id": timestamp,
"lastName": last_name,
"name": f"{first_name} {last_name}",
"phone": faker.phone_number(),
"title": faker.prefix(),
"username": f"test_fidesops_user_{timestamp}",
"website": "www.example.com",
},
}
response = requests.post(
f"https://{api_domain}identify", headers=headers, json=body
)
assert response.ok

# Wait until user returns data
retries = 10
while (segment_id := _get_user_id(email, segment_secrets)) is None:
if not retries:
raise Exception(
"The user endpoint did not return the required data for testing during the time limit"
)
retries -= 1
time.sleep(5)

# Create event
body = {
"userId": email,
"type": "track",
"event": "User Registered",
"properties": {"plan": "Free", "accountType": faker.company()},
"context": {"ip": faker.ipv4()},
}

response = requests.post(f"https://{api_domain}track", headers=headers, json=body)
assert response.ok

# Wait until track_events returns data
retries = 10
while _get_track_events(segment_id, segment_secrets) is None:
if not retries:
raise Exception(
"The track_events endpoint did not return the required data for testing during the time limit"
)
retries -= 1
time.sleep(5)
79 changes: 4 additions & 75 deletions tests/integration_tests/saas/test_segment_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from fidesops.task import graph_task
from fidesops.task.graph_task import get_cached_data_for_erasures
from tests.fixtures.saas.segment_fixtures import segment_erasure_data
from tests.graph.graph_test_util import assert_rows_match


Expand Down Expand Up @@ -140,93 +141,21 @@ def test_segment_saas_access_request_task(
assert filtered_results[f"{dataset_name}:segment_user"][0]["segment_id"]


def _create_test_segment_email(base_email: str, timestamp: int) -> str:
at_index: int = base_email.find("@")
email = f"{base_email[0:at_index]}{timestamp}{base_email[at_index:]}"
return email


def create_segment_test_data(
segment_connection_config, segment_identity_email: str
):
"""Seeds a segment user and event"""
segment_secrets = segment_connection_config.secrets
if not segment_identity_email: # Don't run unnecessarily locally
return

faker = Faker()

ts = int(time.time())
email = _create_test_segment_email(segment_identity_email, ts)
first_name = faker.first_name()
last_name = faker.last_name()

# Create user
headers = {
"Content-Type": "application/json",
"Authorization": f"Basic {segment_secrets['user_token']}",
}
body = {
"userId": email,
"traits": {
"subscriptionStatus": "active",
"address": {
"city": faker.city(),
"country": faker.country(),
"postalCode": faker.postcode(),
"state": "NY",
},
"age": random.randrange(18, 99),
"avatar": "",
"industry": "data",
"description": faker.job(),
"email": email,
"firstName": first_name,
"id": ts,
"lastName": last_name,
"name": f"{first_name} {last_name}",
"phone": faker.phone_number(),
"title": faker.prefix(),
"username": f"test_fidesops_user_{ts}",
"website": "www.example.com",
},
}
resp = requests.post(
f"https://{segment_secrets['api_domain']}identify", headers=headers, json=body
)
assert resp.status_code == 200

# Create event
body = {
"userId": email,
"type": "track",
"event": "User Registered",
"properties": {"plan": "Free", "accountType": faker.company()},
"context": {"ip": faker.ipv4()},
}

resp = requests.post(
f"https://{segment_secrets['api_domain']}track", headers=headers, json=body
)
assert resp.status_code == 200
return email


@pytest.mark.integration_saas
@pytest.mark.integration_segment
def test_segment_saas_erasure_request_task(
db,
policy,
segment_connection_config,
segment_dataset_config,
segment_identity_email,
segment_erasure_identity_email,
segment_erasure_data
) -> None:
"""Full erasure request based on the Segment SaaS config"""
config.execution.MASKING_STRICT = False # Allow GDPR Delete

# Create user for GDPR delete
erasure_email = create_segment_test_data(segment_connection_config, segment_identity_email)
time.sleep(8) # Pause before making access/erasure requests
erasure_email = segment_erasure_identity_email
privacy_request = PrivacyRequest(
id=f"test_saas_access_request_task_{random.randint(0, 1000)}"
)
Expand Down
44 changes: 37 additions & 7 deletions tests/integration_tests/saas/test_sentry_task.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import requests
import random
import time
from typing import Any, Dict, List, Optional

from fidesops.task.filter_results import filter_data_categories
import pytest
import random
import requests

from fidesops.graph.graph import DatasetGraph
from fidesops.models.privacy_request import PrivacyRequest
from fidesops.schemas.redis_cache import PrivacyRequestIdentity

from fidesops.task import graph_task
from fidesops.task.filter_results import filter_data_categories
from fidesops.task.graph_task import get_cached_data_for_erasures
from tests.graph.graph_test_util import assert_rows_match

Expand Down Expand Up @@ -205,22 +206,51 @@ def test_sentry_access_request_task(
)


def _get_issues(
project: Dict[str, Any],
secrets: Dict[str, Any],
headers: Dict[str, Any],
) -> Optional[List[Dict[str, Any]]]:
response = requests.get(
f"https://{secrets['host']}/api/0/projects/{project['organization']['slug']}/{project['slug']}/issues/",
headers=headers,
)
json = response.json()
return json if response.ok and len(json) else None


def sentry_erasure_test_prep(sentry_connection_config, db):
sentry_secrets = sentry_connection_config.secrets
# Set the assignedTo field on a sentry issue to a given employee
token = sentry_secrets.get("erasure_access_token")
issue_url = sentry_secrets.get("issue_url")
sentry_user_id = sentry_secrets.get("user_id_erasure")
host = sentry_secrets.get("host")

if not token or not issue_url or not sentry_user_id:
# Exit early if these haven't been set locally
return None, None, None

headers = {"Authorization": f"Bearer {token}"}
data = {"assignedTo": f"user:{sentry_user_id}"}
resp = requests.put(issue_url, json=data, headers=headers)
assert resp.status_code == 200
assert resp.json()["assignedTo"]["id"] == sentry_user_id
response = requests.put(issue_url, json=data, headers=headers)
assert response.ok
assert response.json().get("assignedTo", {}).get("id") == sentry_user_id

# Get projects
response = requests.get(f"https://{host}/api/0/projects/", headers=headers)
assert response.ok
project = response.json()[0]

# Wait until issues returns data
retries = 10
while _get_issues(project, sentry_secrets, headers) is None:
if not retries:
raise Exception(
"The issues endpoint did not return the required data for testing during the time limit"
)
retries -= 1
time.sleep(5)

# Temporarily sets the access token to one that works for erasures
sentry_connection_config.secrets["access_token"] = sentry_secrets[
Expand Down
Loading