diff --git a/.dockerignore b/.dockerignore index 7f4d222715f..d071ecf439d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,11 +3,31 @@ build/ dist/ src/fidesctl.egg-info/ +# Ignore Python-Specific Files +.mypy_cache/ +.nox/ +.pytest_cache/ +__pycache__/ +.coverage + +# pyenv +.python-version + +# Environments +.env +.venv +env/ +venv/ + +# Editors +.vscode/ +.idea/ + # Ignore the docs docs/ # Ignore dev files +.git/ .github/ .devcontainer/ - node_modules/ diff --git a/.fides/dataset.yml b/.fides/dataset.yml index af0046f694a..dd2676ecd3b 100644 --- a/.fides/dataset.yml +++ b/.fides/dataset.yml @@ -1,508 +1,869 @@ dataset: - - fides_key: public - organization_fides_key: default_organization - name: public - description: "The dataset responsible for storing all of the data related to a fidesctl instance." - meta: {} - data_categories: - - system.operations - retention: "1 year post employment" - collections: - - name: alembic_version - fields: - - name: version_num - data_categories: - - system.operations - - - name: data_categories - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: parent_key - data_categories: - - system.operations - - - name: created_at - data_categories: - - system.operations - - - name: updated_at - data_categories: - - system.operations - - - name: data_qualifiers - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: parent_key - data_categories: - - system.operations - - - name: created_at - data_categories: - - system.operations - - - name: updated_at - data_categories: - - system.operations - - - name: data_subjects - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: created_at - data_categories: - - system.operations - - - name: updated_at - data_categories: - - system.operations - - - name: rights - description: "JSON structure containing a strategy and optional values for detailing data subject rights available" - data_categories: - - system.operations - - - name: automated_decisions_or_profiling - description: "Boolean value representing if automated decisions or profiling is used for the data subject." - data_categories: - - system.operations - - - name: data_uses - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: legal_basis - data_categories: - - system.operations - - - name: recipients - data_categories: - - system.operations - - - name: special_category - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: legitimate_interest - description: "Boolean value denoting whether or not the data use is marked as a legitimate interest" - data_categories: - - system.operations - - - name: legitimate_interest_impact_assessment - description: "A url pointing to a legitimate interest impact assessment" - data_categories: - - system.operations - - - name: parent_key - data_categories: - - system.operations - - - name: created_at - data_categories: - - system.operations - - - name: updated_at - data_categories: - - system.operations - - - name: datasets - fields: - - name: collections - data_categories: - - system.operations - - - name: legal_basis - description: "The legal basis for processing personal data as defined by Article 6 of the GDPR" - data_categories: - - system.operations - - - name: fidesctl_meta - description: "Metadata specifically for the fidesctl application" - data_categories: - - system.operations - - - name: special_category - description: "The special category as defined by Article 9 of the GDPR" - data_categories: - - system.operations - - - name: recipients - description: "An array of recipients of the intended data use." - data_categories: - - system.operations - - - name: data_categories - data_categories: - - system.operations - - - name: data_qualifier - data_categories: - - system.operations - - - name: third_country_transfers - data_categories: - - system.operations - - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: meta - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: joint_controller - description: "Encrypted contact information for a joint controller (name, address, email, phone)" - data_categories: - - user.provided.identifiable.contact - retention: "End of joint controller agreement." - - - name: retention - description: "A string representing how long the dataset is retained for. Can also be found and applied as a property within Collections and Fields." - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations - - - name: evaluations - fields: - - name: details - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: message - data_categories: - - system.operations - - - name: status - data_categories: - - system.operations - - - name: violations - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations - - - name: organizations - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: fidesctl_meta - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: organization_parent_key - data_categories: - - system.operations - - - name: controller - description: "Encrypted contact information for the controller (name, address, email, phone)" - data_categories: - - user.provided.identifiable.contact - retention: "1 Year post-employment" - - - name: data_protection_officer - description: "Encrypted contact information for the Data Protection Officer (name, address, email, phone)" - data_categories: - - user.provided.identifiable.contact - retention: "1 Year post-employment" - - - name: representative - description: "Encrypted contact information for the representative (name, address, email, phone)" - data_categories: - - user.provided.identifiable.contact - retention: "1 Year post-employment" - - - name: security_policy - description: "A link to the Ethyca security policy" - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations - - - name: policies - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: rules - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations - - - name: registries - fields: - - name: description - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations - - - name: systems - fields: - - name: description - data_categories: - - system.operations - - - name: third_country_transfers - data_categories: - - system.operations - - - name: administrating_department - data_categories: - - system.operations - - - name: fidesctl_meta - data_categories: - - system.operations - - - name: data_responsibility_title - data_categories: - - system.operations - - - name: fides_key - data_categories: - - system.operations - - - name: id - data_categories: - - system.operations - - - name: meta - data_categories: - - system.operations - - - name: name - data_categories: - - system.operations - - - name: organization_fides_key - data_categories: - - system.operations - - - name: privacy_declarations - data_categories: - - system.operations - - - name: registry_id - data_categories: - - system.operations - - - name: system_dependencies - data_categories: - - system.operations - - - name: system_type - data_categories: - - system.operations - - - name: joint_controller - description: "Encrypted contact information for a joint controller (name, address, email, phone)" - data_categories: - - user.provided.identifiable.contact - retention: "End of joint controller agreement." - - - name: data_protection_impact_assessment - description: "Properties identifying if a DPIA is required, the status of it, and a link if applicable." - data_categories: - - system.operations - - - name: created_at - description: "The timestamp of when the row was created" - data_categories: - - system.operations - - - name: updated_at - description: "The timestamp of when the row was last updated" - data_categories: - - system.operations +- fides_key: public + organization_fides_key: default_organization + name: public + description: The dataset responsible for storing all of the data related to a fidesctl + instance. + meta: {} + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + fidesctl_meta: null + joint_controller: null + retention: 1 year post employment + third_country_transfers: null + collections: + - name: alembic_version + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: version_num + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_categories + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: parent_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_qualifiers + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: parent_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_subjects + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: automated_decisions_or_profiling + description: Boolean value representing if automated decisions or profiling + is used for the data subject. + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: created_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: rights + description: JSON structure containing a strategy and optional values for detailing + data subject rights available + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_uses + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: legal_basis + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: legitimate_interest + description: Boolean value denoting whether or not the data use is marked as + a legitimate interest + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: legitimate_interest_impact_assessment + description: A url pointing to a legitimate interest impact assessment + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: parent_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: recipients + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: special_category + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: datasets + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: collections + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_categories + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_qualifier + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fidesctl_meta + description: Metadata specifically for the fidesctl application + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: joint_controller + description: Encrypted contact information for a joint controller (name, address, + email, phone) + data_categories: + - user.provided.identifiable.contact + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: End of joint controller agreement. + fields: null + - name: legal_basis + description: The legal basis for processing personal data as defined by Article + 6 of the GDPR + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: meta + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: recipients + description: An array of recipients of the intended data use. + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: retention + description: A string representing how long the dataset is retained for. Can + also be found and applied as a property within Collections and Fields. + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: special_category + description: The special category as defined by Article 9 of the GDPR + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: third_country_transfers + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: evaluations + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: details + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: message + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: status + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: violations + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organizations + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: controller + description: Encrypted contact information for the controller (name, address, + email, phone) + data_categories: + - user.provided.identifiable.contact + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: 1 Year post-employment + fields: null + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_protection_officer + description: Encrypted contact information for the Data Protection Officer (name, + address, email, phone) + data_categories: + - user.provided.identifiable.contact + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: 1 Year post-employment + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fidesctl_meta + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_parent_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: representative + description: Encrypted contact information for the representative (name, address, + email, phone) + data_categories: + - user.provided.identifiable.contact + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: 1 Year post-employment + fields: null + - name: security_policy + description: A link to the Ethyca security policy + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: policies + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: rules + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: registries + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: systems + description: null + data_categories: null + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: + - name: administrating_department + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: created_at + description: The timestamp of when the row was created + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_protection_impact_assessment + description: Properties identifying if a DPIA is required, the status of it, + and a link if applicable. + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: data_responsibility_title + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: description + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: fidesctl_meta + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: joint_controller + description: Encrypted contact information for a joint controller (name, address, + email, phone) + data_categories: + - user.provided.identifiable.contact + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: End of joint controller agreement. + fields: null + - name: meta + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: name + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: organization_fides_key + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: privacy_declarations + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: registry_id + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: system_dependencies + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: system_type + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: third_country_transfers + description: null + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null + - name: updated_at + description: The timestamp of when the row was last updated + data_categories: + - system.operations + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + retention: null + fields: null diff --git a/.fides/policy.yml b/.fides/policy.yml index befa6bc5be7..529ca7b0e5c 100644 --- a/.fides/policy.yml +++ b/.fides/policy.yml @@ -1,41 +1,40 @@ policy: - - fides_key: fidesctl_policy - name: Fidesctl Policy - description: The main privacy policy for Fidesctl. - rules: - - name: reject_non_system_data - description: Disallow any non-system data or uses. - data_categories: - matches: OTHER - values: - - system.operations - data_uses: - matches: OTHER - values: - - provide - data_subjects: - matches: OTHER - values: - - anonymous_user - data_qualifier: aggregated - - fides_key: data_sharing_policy - name: Data Sharing - description: The privacy policy that governs sharing of data with third parties. - rules: - - name: Disallow Third-Party Marketing - description: Disallow collecting any user contact info to use for marketing. - data_categories: - matches: ANY # If any of these data categories are being used - values: - - account - - user - data_uses: - matches: ANY # And the use of the data is for third-party sharing - values: - - third_party_sharing - data_subjects: - matches: ANY # And the data subject is a customer - values: - - customer - # And the data is identifiable, trigger a violation - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified +- fides_key: fidesctl_policy + organization_fides_key: default_organization + name: Fidesctl Policy + description: The main privacy policy for Fidesctl. + rules: + - name: reject_non_system_data + data_categories: + matches: OTHER + values: + - system.operations + data_uses: + matches: OTHER + values: + - provide + data_subjects: + matches: OTHER + values: + - anonymous_user + data_qualifier: aggregated +- fides_key: data_sharing_policy + organization_fides_key: default_organization + name: Data Sharing + description: The privacy policy that governs sharing of data with third parties. + rules: + - name: Disallow Third-Party Marketing + data_categories: + matches: ANY + values: + - account + - user + data_uses: + matches: ANY + values: + - third_party_sharing + data_subjects: + matches: ANY + values: + - customer + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/.fides/system.yml b/.fides/system.yml index 7d416ed752b..fee9d08c441 100644 --- a/.fides/system.yml +++ b/.fides/system.yml @@ -1,16 +1,29 @@ system: - - fides_key: fidesctl_system - name: Fidesctl System - description: Software that functionally applies Fides. - system_type: Service - privacy_declarations: - - name: Store system data. - data_categories: - - system.operations - - user.provided.identifiable.contact - data_use: improve.system - data_subjects: - - anonymous_user - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - dataset_references: - - public +- fides_key: fidesctl_system + organization_fides_key: default_organization + name: Fidesctl System + description: Software that functionally applies Fides. + registry_id: null + meta: null + fidesctl_meta: null + system_type: Service + data_responsibility_title: Controller + privacy_declarations: + - name: Store system data. + data_categories: + - system.operations + - user.provided.identifiable.contact + data_use: improve.system + data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified + data_subjects: + - anonymous_user + dataset_references: + - public + system_dependencies: null + joint_controller: null + third_country_transfers: null + administrating_department: Not defined + data_protection_impact_assessment: + is_required: false + progress: null + link: null diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 285e6ed585c..a5a2546085f 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -2,12 +2,15 @@ name: Docker Build & Push on: push: + branches: + - main tags: - "*" env: DOCKER_USER: ethycaci DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} + TAG: ${{ github.event.release.tag_name }} jobs: push-fidesctl: @@ -26,8 +29,12 @@ jobs: - name: Install Dev Requirements run: pip install -r dev-requirements.txt - - name: Build Fidesctl + - name: Build Fidesctl Image run: nox -s "build(prod)" - - name: Push Fidesctl - run: nox -s push + - name: Push Fidesctl Dev Tag + run: nox -s "push(dev)" + + - name: Push Fidesctl Prod Tags + if: ${{ env.TAG }} + run: nox -s "push(prod)" diff --git a/CHANGELOG.md b/CHANGELOG.md index c95e601872e..d2d9db80466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,8 @@ The types of changes are: * Add tooltips for help on forms * Okta, aws and database credentials can now come from `fidesctl.toml` config [#694](https://github.com/ethyca/fides/pull/694) * New `validate` endpoint to test aws and okta credentials [#722](https://github.com/ethyca/fides/pull/722) +* A new image tagged `ethyca/fidesctl:dev` is published on each push to `main` [781](https://github.com/ethyca/fides/pull/781) +* A new cli command (`fidesctl sync`) [#765](https://github.com/ethyca/fides/pull/765) ### Changed diff --git a/noxfiles/constants_nox.py b/noxfiles/constants_nox.py index d72aa03367d..b70beabe617 100644 --- a/noxfiles/constants_nox.py +++ b/noxfiles/constants_nox.py @@ -24,6 +24,7 @@ def get_current_tag() -> str: IMAGE = f"{REGISTRY}/{IMAGE_NAME}" IMAGE_LOCAL = f"{IMAGE}:local" IMAGE_LOCAL_UI = f"{IMAGE}:local-ui" +IMAGE_DEV = f"{IMAGE}:dev" IMAGE_LATEST = f"{IMAGE}:latest" # Disable TTY to perserve output within Github Actions logs diff --git a/noxfiles/docker_nox.py b/noxfiles/docker_nox.py index 4312499ff20..4fe9436bc4a 100644 --- a/noxfiles/docker_nox.py +++ b/noxfiles/docker_nox.py @@ -2,6 +2,7 @@ import nox from constants_nox import ( IMAGE, + IMAGE_DEV, IMAGE_LATEST, IMAGE_LOCAL, IMAGE_LOCAL_UI, @@ -49,8 +50,23 @@ def build(session: nox.Session, image: str) -> None: @nox.session() -def push(session: nox.Session) -> None: +@nox.parametrize( + "tag", + [ + nox.param("prod", id="prod"), + nox.param("dev", id="dev"), + ], +) +def push(session: nox.Session, tag: str) -> None: """Push the fidesctl Docker image to Dockerhub.""" - session.run("docker", "tag", get_current_image(), IMAGE_LATEST, external=True) - session.run("docker", "push", IMAGE, external=True) - session.run("docker", "push", IMAGE_LATEST, external=True) + + tag_matrix = {"prod": IMAGE_LATEST, "dev": IMAGE_DEV} + + # Push either "ethyca/fidesctl:dev" or "ethyca/fidesctl:latest" + session.run("docker", "tag", get_current_image(), tag_matrix[tag], external=True) + session.run("docker", "push", tag_matrix[tag], external=True) + + # Only push the tagged version if its for prod + # Example: "ethyca/fidesctl:1.7.0" + if tag == "prod": + session.run("docker", "push", IMAGE, external=True) diff --git a/src/fidesctl/cli/__init__.py b/src/fidesctl/cli/__init__.py index c8aedcaf4b1..0fe4b8eed0c 100644 --- a/src/fidesctl/cli/__init__.py +++ b/src/fidesctl/cli/__init__.py @@ -11,7 +11,7 @@ from fidesctl.core.config import get_config from .commands.annotate import annotate -from .commands.core import apply, evaluate, parse +from .commands.core import apply, evaluate, parse, sync from .commands.crud import delete, get, ls from .commands.db import database from .commands.export import export @@ -33,6 +33,7 @@ ls, scan, status, + sync, ] ALL_COMMANDS = API_COMMANDS + LOCAL_COMMANDS SERVER_CHECK_COMMAND_NAMES = [ diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 44765813e2c..4911b05f0f4 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -12,6 +12,7 @@ from fidesctl.core import audit as _audit from fidesctl.core import evaluate as _evaluate from fidesctl.core import parse as _parse +from fidesctl.core import sync as _sync @click.command() @@ -131,3 +132,22 @@ def parse(ctx: click.Context, manifests_dir: str, verbose: bool = False) -> None taxonomy = _parse.parse(manifests_dir=manifests_dir) if verbose: pretty_echo(taxonomy.dict(), color="green") + + +@click.command() +@click.pass_context +@manifests_dir_argument +@with_analytics +def sync(ctx: click.Context, manifests_dir: str) -> None: + """ + Update local resource files by their fides_key to match their server versions. + """ + + config = ctx.obj["CONFIG"] + # Do this to validate the manifests since they won't get parsed during the sync process + _parse.parse(manifests_dir) + _sync.sync( + url=config.cli.server_url, + manifests_dir=manifests_dir, + headers=config.user.request_headers, + ) diff --git a/src/fidesctl/core/api_helpers.py b/src/fidesctl/core/api_helpers.py index e13291cda92..e4c47be96f6 100644 --- a/src/fidesctl/core/api_helpers.py +++ b/src/fidesctl/core/api_helpers.py @@ -2,7 +2,7 @@ Reusable utilities meant to make repetitive api-related tasks easier. """ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from fideslang import FidesModel from fideslang.parse import parse_dict @@ -46,7 +46,8 @@ def get_server_resource( resource_type: str, resource_key: str, headers: Dict[str, str], -) -> Optional[FidesModel]: + raw: bool = False, +) -> Optional[Union[FidesModel, Dict]]: """ Attempt to get a given resource from the server. @@ -61,15 +62,17 @@ def get_server_resource( ) server_resource: Optional[FidesModel] = ( - parse_dict( - resource_type=resource_type, - resource=raw_server_response.json(), - from_server=True, - ) + raw_server_response.json() if raw_server_response.status_code >= 200 and raw_server_response.status_code <= 299 else None ) + if not raw and server_resource: + server_resource = parse_dict( + resource_type=resource_type, + resource=raw_server_response.json(), + from_server=True, + ) return server_resource diff --git a/src/fidesctl/core/sync.py b/src/fidesctl/core/sync.py new file mode 100644 index 00000000000..98563141158 --- /dev/null +++ b/src/fidesctl/core/sync.py @@ -0,0 +1,51 @@ +"""This module handles the logic for syncing remote resource versions into their local file.""" +from typing import Dict + +import yaml +from fideslang.manifests import load_yaml_into_dict + +from fidesctl.cli.utils import echo_green, print_divider +from fidesctl.core.api_helpers import get_server_resource +from fidesctl.core.utils import get_manifest_list + + +def sync(manifests_dir: str, url: str, headers: Dict[str, str]) -> None: + """ + If a resource in a local file has a matching resource on the server, + write out the server version into the local file. + """ + + manifest_path_list = get_manifest_list(manifests_dir) + + print_divider() + for manifest_path in manifest_path_list: + print(f"Syncing file: '{manifest_path}'...") + manifest = load_yaml_into_dict(manifest_path) + updated_manifest = {} + + for resource_type in manifest.keys(): + resource_list = manifest[resource_type] + updated_resource_list = [] + + for resource in resource_list: + fides_key = resource["fides_key"] + server_resource = get_server_resource( + url, resource_type, fides_key, headers, raw=True + ) + + if server_resource: + updated_resource_list.append(server_resource) + print( + f" - {resource_type.capitalize()} with fides_key: {fides_key} is being updated from the server..." + ) + else: + updated_resource_list.append(resource) + + updated_manifest[resource_type] = updated_resource_list + + with open(manifest_path, "w") as manifest_file: + yaml.dump(updated_manifest, manifest_file, sort_keys=False, indent=2) + echo_green(f"Updated manifest file written out to: '{manifest_path}'") + print_divider() + + echo_green("Sync complete.") diff --git a/src/fidesctl/core/utils.py b/src/fidesctl/core/utils.py index 02bf9f2e77f..21c35908cb9 100644 --- a/src/fidesctl/core/utils.py +++ b/src/fidesctl/core/utils.py @@ -1,9 +1,10 @@ """Utils to help with API calls.""" +import glob import logging import re from functools import partial from json.decoder import JSONDecodeError -from typing import Dict, Iterator +from typing import Dict, Iterator, List import click import jwt @@ -87,6 +88,17 @@ def get_all_level_fields(fields: list) -> Iterator[DatasetField]: yield nested_field +def get_manifest_list(manifests_dir: str) -> List[str]: + """Get a list of manifest files from the manifest directory.""" + + yml_endings = ["yml", "yaml"] + manifest_list = [] + for yml_ending in yml_endings: + manifest_list += glob.glob(f"{manifests_dir}/**/*.{yml_ending}", recursive=True) + + return manifest_list + + def check_fides_key(proposed_fides_key: str) -> str: """ A helper function to automatically sanitize diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 09d232547c5..75ef7e8b003 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -1,5 +1,7 @@ # pylint: disable=missing-docstring, redefined-outer-name import os +from pathlib import PosixPath +from shutil import copytree from typing import Generator import pytest @@ -96,6 +98,18 @@ def test_dry_diff_apply(test_config_path: str, test_cli_runner: CliRunner) -> No assert result.exit_code == 0 +@pytest.mark.integration +def test_sync( + test_config_path: str, test_cli_runner: CliRunner, tmp_path: PosixPath +) -> None: + copytree("demo_resources", tmp_path, dirs_exist_ok=True) + result = test_cli_runner.invoke( + cli, ["-f", test_config_path, "sync", str(tmp_path)] + ) + print(result.output) + assert result.exit_code == 0 + + @pytest.mark.integration def test_audit(test_config_path: str, test_cli_runner: CliRunner) -> None: result = test_cli_runner.invoke(cli, ["-f", test_config_path, "evaluate", "-a"]) diff --git a/tests/core/test_sync.py b/tests/core/test_sync.py new file mode 100644 index 00000000000..73979aab247 --- /dev/null +++ b/tests/core/test_sync.py @@ -0,0 +1,3 @@ +def test_sync() -> None: + """Placeholder test.""" + assert 1 diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 06460c917fd..3934897519d 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -1,4 +1,5 @@ # pylint: disable=missing-docstring, redefined-outer-name +from pathlib import PosixPath from typing import Generator import pytest @@ -59,6 +60,21 @@ def test_nested_fields_unpacked( @pytest.mark.unit +def test_get_manifest_list(tmp_path: PosixPath) -> None: + """Test that the correct number of yml files are returned.""" + test_dir = tmp_path / "test" + test_dir.mkdir() + test_files = ["foo.yml", "foo.yaml"] + + for file in test_files: + test_file = test_dir / file + print(test_file) + test_file.write_text("content") + + manifest_list = utils.get_manifest_list(str(test_dir)) + assert len(manifest_list) == 2 + + @pytest.mark.parametrize( "fides_key, sanitized_fides_key", [("foo", "foo"), ("@foo#", "_foo_"), (":_foo)bar!123$", "__foo_bar_123_")],