From 6891adea9e5253e0f700a23ea99246d274ee0123 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Sep 2023 23:49:29 +0800 Subject: [PATCH 01/27] Bump Pydantic to version 2 --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2b0dd342..a82333e1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,7 +4,7 @@ nox.options.reuse_existing_virtualenvs = True TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] -TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9"] +TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9", "2.3.0"] TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] From c4917580f00f68d0f07284abe1a889ae7bd439dd Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Sep 2023 10:45:18 +0800 Subject: [PATCH 02/27] Add Pydantic 2 to Action Matrix --- .github/workflows/pr_checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index d6719934..4ede4bfd 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -94,7 +94,7 @@ jobs: strategy: matrix: python_version: ["3.8", "3.9", "3.10", "3.11"] - pydantic_version: ["1.8.2", "1.9.2", "1.10.9"] + pydantic_version: ["1.8.2", "1.9.2", "1.10.9", "2.3.0"] pyyaml_version: ["5.4.1", "6.0"] runs-on: ubuntu-latest continue-on-error: true From 153bf2dfe83a83128ec7a6e6ce8d5cb55bb38f28 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 10 Nov 2023 16:00:11 +0800 Subject: [PATCH 03/27] checkin --- noxfile.py | 2 +- requirements.txt | 2 +- src/fideslang/__init__.py | 2 +- src/fideslang/default_taxonomy/utils.py | 2 +- src/fideslang/models.py | 303 +++++++++++------------- src/fideslang/relationships.py | 2 +- src/fideslang/utils.py | 2 +- src/fideslang/validation.py | 61 +++-- tests/fideslang/test_validation.py | 11 +- 9 files changed, 189 insertions(+), 198 deletions(-) diff --git a/noxfile.py b/noxfile.py index a82333e1..d350324e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,7 +4,7 @@ nox.options.reuse_existing_virtualenvs = True TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] -TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9", "2.3.0"] +TESTED_PYDANTIC_VERSIONS = ["2.3.0"] TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] diff --git a/requirements.txt b/requirements.txt index cc280885..1866b067 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pydantic>=1.8.1,<1.11.0 +pydantic>=2.0.3,<=2.3.0 pyyaml>=5,<7 packaging>=20.0 diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py index de20f1bf..28375cf1 100644 --- a/src/fideslang/__init__.py +++ b/src/fideslang/__init__.py @@ -20,7 +20,6 @@ DataSubject, DataUse, Evaluation, - FidesCollectionKey, FidesDatasetReference, FidesMeta, FidesModel, @@ -33,6 +32,7 @@ System, Taxonomy, ) +from .validation import FidesCollectionKey FidesModelType = Union[Type[FidesModel], Type[Evaluation]] model_map: Dict[str, FidesModelType] = { diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index dc85cabf..88dcc9fc 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -18,5 +18,5 @@ def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: # This is the version where we started tracking from, so # we use it as the default starting point. kwargs["version_added"] = "2.0.0" # type: ignore[assignment] - item = taxonomy_class.parse_obj(kwargs) + item = taxonomy_class.model_validate(kwargs) return item diff --git a/src/fideslang/models.py b/src/fideslang/models.py index cc7fe9da..d632e785 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -10,14 +10,15 @@ from warnings import warn from pydantic import ( + field_validator, + model_validator, + ConfigDict, AnyUrl, BaseModel, - ConstrainedStr, Field, HttpUrl, + FieldValidationInfo, PositiveInt, - root_validator, - validator, ) from fideslang.validation import ( @@ -27,6 +28,7 @@ deprecated_version_later_than_added, has_versioning_if_default, is_deprecated_if_replaced, + FidesCollectionKey, matching_parent_key, no_self_reference, parse_data_type_string, @@ -36,22 +38,18 @@ ) # Reusable Validators -country_code_validator = validator("third_country_transfers", allow_reuse=True)( +country_code_validator = field_validator("third_country_transfers")( check_valid_country_code ) -matching_parent_key_validator = validator("parent_key", allow_reuse=True, always=True)( - matching_parent_key +matching_parent_key_validator = field_validator("parent_key")(matching_parent_key) +no_self_reference_validator = field_validator("parent_key")(no_self_reference) +has_versioning_if_default_validator = field_validator("is_default")( + has_versioning_if_default ) -no_self_reference_validator = validator("parent_key", allow_reuse=True)( - no_self_reference -) -has_versioning_if_default_validator = validator( - "is_default", allow_reuse=True, always=True -)(has_versioning_if_default) -deprecated_version_later_than_added_validator = validator( - "version_deprecated", allow_reuse=True +deprecated_version_later_than_added_validator = field_validator( + "version_deprecated", )(deprecated_version_later_than_added) -is_deprecated_if_replaced_validator = validator("replaced_by", allow_reuse=True)( +is_deprecated_if_replaced_validator = field_validator("replaced_by")( is_deprecated_if_replaced ) @@ -77,13 +75,13 @@ class FidesModel(BaseModel): description="Defines the Organization that this resource belongs to.", ) tags: Optional[List[str]] = None - name: Optional[str] = name_field - description: Optional[str] = description_field - - class Config: - "Config for the FidesModel" - extra = "ignore" - orm_mode = True + name: Optional[str] = Field( + default=None, description="Human-Readable name for this resource." + ) + description: Optional[str] = Field( + default=None, description="A detailed description of what this resource is." + ) + model_config = ConfigDict(extra="ignore", from_attributes=True) class DefaultModel(BaseModel): @@ -108,17 +106,12 @@ class DefaultModel(BaseModel): description="Denotes whether the resource is part of the default taxonomy or not.", ) - _has_versioning_if_default: classmethod = has_versioning_if_default_validator - _deprecated_version_later_than_added: classmethod = ( - deprecated_version_later_than_added_validator - ) - _is_deprecated_if_replaced: classmethod = is_deprecated_if_replaced_validator + _has_versioning_if_default = has_versioning_if_default_validator + _deprecated_version_later_than_added = deprecated_version_later_than_added_validator + _is_deprecated_if_replaced = is_deprecated_if_replaced_validator - @validator("version_added") - @classmethod - def validate_verion_added( - cls, version_added: Optional[str], values: Dict - ) -> Optional[str]: + @field_validator("version_added") + def validate_verion_added(cls, version_added: Optional[str]) -> Optional[str]: """ Validate that the `version_added` field is a proper FidesVersion """ @@ -128,10 +121,9 @@ def validate_verion_added( FidesVersion.validate(version_added) return version_added - @validator("version_deprecated") - @classmethod + @field_validator("version_deprecated") def validate_version_deprecated( - cls, version_deprecated: Optional[str], values: Dict + cls, version_deprecated: Optional[str] ) -> Optional[str]: """ Validate that the `version_deprecated` is a proper FidesVersion @@ -273,32 +265,28 @@ class SpecialCategoryLegalBasisEnum(str, Enum): class DataCategory(FidesModel, DefaultModel): """The DataCategory resource model.""" - parent_key: Optional[FidesKey] + parent_key: Optional[FidesKey] = None - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator + _matching_parent_key = matching_parent_key_validator + _no_self_reference = no_self_reference_validator class DataQualifier(FidesModel, DefaultModel): """The DataQualifier resource model.""" - parent_key: Optional[FidesKey] + parent_key: Optional[FidesKey] = None - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator + _matching_parent_key = matching_parent_key_validator + _no_self_reference = no_self_reference_validator class Cookies(BaseModel): """The Cookies resource model""" name: str - path: Optional[str] - domain: Optional[str] - - class Config: - """Config for the cookies""" - - orm_mode = True + path: Optional[str] = None + domain: Optional[str] = None + model_config = ConfigDict(from_attributes=True) class DataSubjectRights(BaseModel): @@ -314,11 +302,11 @@ class DataSubjectRights(BaseModel): description="Defines the strategy used when mapping data rights to a data subject.", ) values: Optional[List[DataSubjectRightsEnum]] = Field( + default=None, description="A list of valid data subject rights to be used when applying data rights to a data subject via a strategy.", ) - @root_validator() - @classmethod + @model_validator(mode="before") def include_exclude_has_values(cls, values: Dict) -> Dict: """ Validate the if include or exclude is chosen, that at least one @@ -335,8 +323,11 @@ def include_exclude_has_values(cls, values: Dict) -> Dict: class DataSubject(FidesModel, DefaultModel): """The DataSubject resource model.""" - rights: Optional[DataSubjectRights] = Field(description=DataSubjectRights.__doc__) + rights: Optional[DataSubjectRights] = Field( + default=None, description=DataSubjectRights.__doc__ or "" + ) automated_decisions_or_profiling: Optional[bool] = Field( + default=None, description="A boolean value to annotate whether or not automated decisions/profiling exists for the data subject.", ) @@ -346,26 +337,30 @@ class DataUse(FidesModel, DefaultModel): parent_key: Optional[FidesKey] = None legal_basis: Optional[LegalBasisEnum] = Field( + default=None, description="Deprecated. The legal basis category of which the data use falls under. This field is used as part of the creation of an exportable data map.", ) special_category: Optional[SpecialCategoriesEnum] = Field( + default=None, description="Deprecated. The special category for processing of which the data use falls under. This field is used as part of the creation of an exportable data map.", ) recipients: Optional[List[str]] = Field( + default=None, description="Deprecated. An array of recipients when sharing personal data outside of your organization.", ) legitimate_interest: Optional[bool] = Field( + default=None, description="Deprecated. A boolean representation of if the legal basis used is `Legitimate Interest`. Validated at run time and looks for a `legitimate_interest_impact_assessment` to exist if true.", ) legitimate_interest_impact_assessment: Optional[AnyUrl] = Field( + default=None, description="Deprecated. A url pointing to the legitimate interest impact assessment. Required if the legal bases used is legitimate interest.", ) - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator + _matching_parent_key = matching_parent_key_validator + _no_self_reference = no_self_reference_validator - @root_validator - @classmethod + @model_validator(mode="before") def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Data Use fields pending deprecation. @@ -385,16 +380,14 @@ def deprecate_fields(cls, values: Dict) -> Dict: ) return values - @validator("legitimate_interest", always=True) - @classmethod + @field_validator("legitimate_interest") def set_legitimate_interest(cls, value: bool, values: Dict) -> bool: """Sets if a legitimate interest is used.""" if values["legal_basis"] == "Legitimate Interests": value = True return value - @validator("legitimate_interest_impact_assessment", always=True) - @classmethod + @field_validator("legitimate_interest_impact_assessment") def ensure_impact_assessment(cls, value: AnyUrl, values: Dict) -> AnyUrl: """ Validates an impact assessment is applied if a @@ -433,6 +426,7 @@ class MyDatasetField(DatasetFieldBase): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", ) data_qualifier: FidesKey = Field( @@ -440,6 +434,7 @@ class MyDatasetField(DatasetFieldBase): description="A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular.", ) retention: Optional[str] = Field( + default=None, description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) @@ -456,7 +451,7 @@ class FidesDatasetReference(BaseModel): dataset: FidesKey field: str - direction: Optional[EdgeDirection] + direction: Optional[EdgeDirection] = None class FidesMeta(BaseModel): @@ -467,25 +462,32 @@ class FidesMeta(BaseModel): default=None, ) identity: Optional[str] = Field( - description="The type of the identity data that should be used to query this collection for a DSR." + default=None, + description="The type of the identity data that should be used to query this collection for a DSR.", ) primary_key: Optional[bool] = Field( - description="Whether the current field can be considered a primary key of the current collection" + default=None, + description="Whether the current field can be considered a primary key of the current collection", ) data_type: Optional[str] = Field( - description="Optionally specify the data type. Fides will attempt to cast values to this type when querying." + default=None, + description="Optionally specify the data type. Fides will attempt to cast values to this type when querying.", ) length: Optional[PositiveInt] = Field( - description="Optionally specify the allowable field length. Fides will not generate values that exceed this size." + default=None, + description="Optionally specify the allowable field length. Fides will not generate values that exceed this size.", ) return_all_elements: Optional[bool] = Field( - description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False." + default=None, + description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False.", ) read_only: Optional[bool] = Field( - description="Optionally specify if a field is read-only, meaning it can't be updated or deleted." + default=None, + description="Optionally specify if a field is read-only, meaning it can't be updated or deleted.", ) - @validator("data_type") + @field_validator("data_type") + @classmethod @classmethod def valid_data_type(cls, value: Optional[str]) -> Optional[str]: """Validate that all annotated data types exist in the taxonomy""" @@ -520,7 +522,8 @@ class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat): description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).", ) - @validator("fides_meta") + @field_validator("fides_meta") + @classmethod @classmethod def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: """Validate upfront that the return_all_elements flag can only be specified on array fields""" @@ -536,8 +539,7 @@ def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: ) return meta_values - @validator("fields") - @classmethod + @field_validator("fields") def validate_object_fields( # type: ignore cls, fields: Optional[List["DatasetField"]], @@ -569,28 +571,7 @@ def validate_object_fields( # type: ignore # this is required for the recursive reference in the pydantic model: -DatasetField.update_forward_refs() - - -class FidesCollectionKey(ConstrainedStr): - """ - Dataset.Collection name where both dataset and collection names are valid FidesKeys - """ - - @classmethod - def validate(cls, value: str) -> str: - """ - Overrides validation to check FidesCollectionKey format, and that both the dataset - and collection names have the FidesKey format. - """ - values = value.split(".") - if len(values) == 2: - FidesKey.validate(values[0]) - FidesKey.validate(values[1]) - return value - raise ValueError( - "FidesCollection must be specified in the form 'FidesKey.FidesKey'" - ) +DatasetField.model_rebuild() class CollectionMeta(BaseModel): @@ -624,12 +605,8 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): fides_meta: Optional[CollectionMeta] = None - _sort_fields: classmethod = validator("fields", allow_reuse=True)( - sort_list_objects_by_name - ) - _unique_items_in_list: classmethod = validator("fields", allow_reuse=True)( - unique_items_in_list - ) + _sort_fields = field_validator("fields")(sort_list_objects_by_name) + _unique_items_in_list = field_validator("fields")(unique_items_in_list) class ContactDetails(BaseModel): @@ -668,8 +645,8 @@ class DatasetMetadata(BaseModel): Object used to hold application specific metadata for a dataset """ - resource_id: Optional[str] - after: Optional[List[FidesKey]] + resource_id: Optional[str] = None + after: Optional[List[FidesKey]] = None class Dataset(FidesModel, FidesopsMetaBackwardsCompat): @@ -686,7 +663,7 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): description=DatasetMetadata.__doc__, default=None ) joint_controller: Optional[ContactDetails] = Field( - description="Deprecated. " + ContactDetails.__doc__, + description="Deprecated. " + (ContactDetails.__doc__ or ""), ) retention: Optional[str] = Field( description="Deprecated. An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", @@ -698,16 +675,11 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): description="An array of objects that describe the Dataset's collections.", ) - _sort_collections: classmethod = validator("collections", allow_reuse=True)( - sort_list_objects_by_name - ) - _check_valid_country_code: classmethod = country_code_validator - _unique_items_in_list: classmethod = validator("collections", allow_reuse=True)( - unique_items_in_list - ) + _sort_collections = field_validator("collections")(sort_list_objects_by_name) + _check_valid_country_code = country_code_validator + _unique_items_in_list = field_validator("collections")(unique_items_in_list) - @root_validator - @classmethod + @model_validator(mode="before") def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Dataset fields pending deprecation. @@ -782,11 +754,7 @@ class Evaluation(BaseModel): default="", description="A human-readable string response for the evaluation.", ) - - class Config: - "Config for the Evaluation" - extra = "ignore" - orm_mode = True + model_config = ConfigDict(extra="ignore", from_attributes=True) # Organization @@ -811,7 +779,8 @@ class OrganizationMetadata(BaseModel): """ resource_filters: Optional[List[ResourceFilter]] = Field( - description="A list of filters that can be used when generating or scanning systems." + default=None, + description="A list of filters that can be used when generating or scanning systems.", ) @@ -828,19 +797,23 @@ class Organization(FidesModel): description="An inherited field from the FidesModel that is unused with an Organization.", ) controller: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) data_protection_officer: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) fidesctl_meta: Optional[OrganizationMetadata] = Field( + default=None, description=OrganizationMetadata.__doc__, ) representative: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) security_policy: Optional[HttpUrl] = Field( - description="Am optional URL to the organization security policy." + default=None, description="Am optional URL to the organization security policy." ) @@ -907,9 +880,7 @@ class Policy(FidesModel): description=PolicyRule.__doc__, ) - _sort_rules: classmethod = validator("rules", allow_reuse=True)( - sort_list_objects_by_name - ) + _sort_rules = field_validator("rules")(sort_list_objects_by_name) # Registry @@ -1014,8 +985,7 @@ class PrivacyDeclaration(BaseModel): description="Cookies associated with this data use to deliver services and functionality", ) - @validator("data_qualifier") - @classmethod + @field_validator("data_qualifier") def deprecate_data_qualifier(cls, value: FidesKey) -> FidesKey: """ Warn that the `data_qualifier` field is deprecated, if set. @@ -1028,10 +998,7 @@ def deprecate_data_qualifier(cls, value: FidesKey) -> FidesKey: return value - class Config: - """Config for the Privacy Declaration""" - - orm_mode = True + model_config = ConfigDict(from_attributes=True) class SystemMetadata(BaseModel): @@ -1081,23 +1048,21 @@ class DataFlow(BaseModel): description="An array of data categories describing the data in transit.", ) - @root_validator(skip_on_failure=True) - @classmethod - def user_special_case(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def user_special_case(cls, info: FieldValidationInfo) -> FieldValidationInfo: """ If either the `fides_key` or the `type` are set to "user", then the other must also be set to "user". """ - if values["fides_key"] == "user" or values["type"] == "user": + if info.data["fides_key"] == "user" or info.data["type"] == "user": assert ( - values["fides_key"] == "user" and values["type"] == "user" + info.data["fides_key"] == "user" and info.data["type"] == "user" ), "The 'user' fides_key is required for, and requires, the type 'user'" - return values + return info - @validator("type") - @classmethod + @field_validator("type") def verify_type_is_flowable(cls, value: str) -> str: """ Assert that the value of the `type` field is a member @@ -1117,31 +1082,37 @@ class System(FidesModel): """ registry_id: Optional[int] = Field( + default=None, description="The id of the system registry, if used.", ) meta: Optional[Dict] = meta_field fidesctl_meta: Optional[SystemMetadata] = Field( + default=None, description=SystemMetadata.__doc__, ) system_type: str = Field( description="A required value to describe the type of system being modeled, examples include: Service, Application, Third Party, etc.", ) data_responsibility_title: Optional[DataResponsibilityTitle] = Field( + default=None, description="Deprecated. The responsibility or role over the system that processes personal data", ) egress: Optional[List[DataFlow]] = Field( - description="The resources to which the System sends data." + default=None, description="The resources to which the System sends data." ) ingress: Optional[List[DataFlow]] = Field( - description="The resources from which the System receives data." + default=None, description="The resources from which the System receives data." ) privacy_declarations: List[PrivacyDeclaration] = Field( description=PrivacyDeclaration.__doc__, ) joint_controller: Optional[ContactDetails] = Field( - description="Deprecated. " + ContactDetails.__doc__, + default=None, + description="Deprecated. " + + (ContactDetails.__doc__ or ""), # The 'or' is to satisfy a type issue ) third_country_transfers: Optional[List[str]] = Field( + default=None, description="Deprecated. An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in ISO 3166-1.", ) administrating_department: Optional[str] = Field( @@ -1149,10 +1120,15 @@ class System(FidesModel): description="An optional value to identify the owning department or group of the system within your organization", ) data_protection_impact_assessment: Optional[DataProtectionImpactAssessment] = Field( - description="Deprecated. " + DataProtectionImpactAssessment.__doc__, + default=None, + description="Deprecated. " + + ( + DataProtectionImpactAssessment.__doc__ or "" + ), # The 'or' is to satisfy a type issue ) vendor_id: Optional[str] = Field( - description="The unique identifier for the vendor that's associated with this system." + default=None, + description="The unique identifier for the vendor that's associated with this system.", ) dataset_references: List[FidesKey] = Field( default_factory=list, @@ -1167,7 +1143,8 @@ class System(FidesModel): description="This toggle indicates whether the system is exempt from privacy regulation if they do process personal data.", ) reason_for_exemption: Optional[str] = Field( - description="The reason that the system is exempt from privacy regulation." + default=None, + description="The reason that the system is exempt from privacy regulation.", ) uses_profiling: bool = Field( default=False, @@ -1190,42 +1167,46 @@ class System(FidesModel): description="Whether this system requires data protection impact assessments.", ) dpa_location: Optional[str] = Field( - description="Location where the DPAs or DIPAs can be found." + default=None, description="Location where the DPAs or DIPAs can be found." ) dpa_progress: Optional[str] = Field( - description="The optional status of a Data Protection Impact Assessment" + default=None, + description="The optional status of a Data Protection Impact Assessment", ) privacy_policy: Optional[AnyUrl] = Field( - description="A URL that points to the System's publicly accessible privacy policy." + default=None, + description="A URL that points to the System's publicly accessible privacy policy.", ) legal_name: Optional[str] = Field( - description="The legal name for the business represented by the system." + default=None, + description="The legal name for the business represented by the system.", ) legal_address: Optional[str] = Field( - description="The legal address for the business represented by the system." + default=None, + description="The legal address for the business represented by the system.", ) responsibility: List[DataResponsibilityTitle] = Field( default_factory=list, description=DataResponsibilityTitle.__doc__, ) dpo: Optional[str] = Field( - description="The official privacy contact address or DPO." + default=None, description="The official privacy contact address or DPO." ) joint_controller_info: Optional[str] = Field( - description="The party or parties that share the responsibility for processing personal data." + default=None, + description="The party or parties that share the responsibility for processing personal data.", ) # Use joint_controller_info in favor of joint_controller data_security_practices: Optional[str] = Field( - description="The data security practices employed by this system." + default=None, description="The data security practices employed by this system." ) - _sort_privacy_declarations: classmethod = validator( - "privacy_declarations", allow_reuse=True - )(sort_list_objects_by_name) + _sort_privacy_declarations = field_validator("privacy_declarations")( + sort_list_objects_by_name + ) - _check_valid_country_code: classmethod = country_code_validator + _check_valid_country_code = country_code_validator - @root_validator - @classmethod + @model_validator(mode="before") def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of System fields pending deprecation. @@ -1244,12 +1225,11 @@ def deprecate_fields(cls, values: Dict) -> Dict: ) return values - @validator("privacy_declarations", each_item=True) - @classmethod + @field_validator("privacy_declarations", check_fields=True) def privacy_declarations_reference_data_flows( cls, value: PrivacyDeclaration, - values: Dict, + info: FieldValidationInfo, ) -> PrivacyDeclaration: """ Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must @@ -1259,8 +1239,8 @@ def privacy_declarations_reference_data_flows( for direction in ["egress", "ingress"]: fides_keys = getattr(value, direction, None) if fides_keys is not None: - data_flows = values[direction] - system = values["fides_key"] + data_flows = info.data[direction] + system = info.data["fides_key"] assert ( data_flows is not None and len(data_flows) > 0 ), f"PrivacyDeclaration '{value.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." @@ -1272,10 +1252,7 @@ def privacy_declarations_reference_data_flows( return value - class Config: - """Class for the System config""" - - use_enum_values = True + model_config = ConfigDict(use_enum_values=True) # Taxonomy diff --git a/src/fideslang/relationships.py b/src/fideslang/relationships.py index b238a226..8840bbdd 100644 --- a/src/fideslang/relationships.py +++ b/src/fideslang/relationships.py @@ -75,7 +75,7 @@ def get_referenced_missing_keys(taxonomy: Taxonomy) -> Set[FidesKey]: """ referenced_keys: List[Set[FidesKey]] = [ find_referenced_fides_keys(resource) - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) ] key_set: Set[FidesKey] = set( diff --git a/src/fideslang/utils.py b/src/fideslang/utils.py index 5b64dbcb..e2c490bc 100644 --- a/src/fideslang/utils.py +++ b/src/fideslang/utils.py @@ -16,7 +16,7 @@ def get_resource_by_fides_key( return { resource_type: resource - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) if resource.fides_key == fides_key } or None diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index b7f4a3d6..cc519591 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -3,10 +3,10 @@ """ import re from collections import Counter -from typing import Dict, Generator, List, Optional, Pattern, Set, Tuple +from typing import Dict, Generator, List, Optional, Set, Tuple, Annotated, Pattern from packaging.version import Version -from pydantic import ConstrainedStr +from pydantic import FieldValidationInfo, AfterValidator from fideslang.default_fixtures import COUNTRY_CODES @@ -30,23 +30,40 @@ def validate(cls, value: str) -> Version: return Version(value) -class FidesKey(ConstrainedStr): - """ - A FidesKey type that creates a custom constrained string. - """ +def fides_key_regex_check(value: str) -> str: + """Throws ValueError if val is not a valid FidesKey""" regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$") + if not regex.match(value): + raise FidesValidationError( + f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" + ) - @classmethod # This overrides the default method to throw the custom FidesValidationError - def validate(cls, value: str) -> str: - """Throws ValueError if val is not a valid FidesKey""" + return value - if not cls.regex.match(value): - raise FidesValidationError( - f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" - ) +fides_key_pattern = "^[a-zA-Z0-9_.<>-]+$" +FidesKey = Annotated[str, AfterValidator(fides_key_regex_check)] + + +def validate_collection_key_parts(value: str) -> str: + """ + Overrides validation to check FidesCollectionKey format, and that both the dataset + and collection names have the FidesKey format. + """ + values = value.split(".") + if len(values) == 2: + FidesKey(values[0]) + FidesKey(values[1]) return value + else: + raise ValueError( + "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + ) + + +# Dataset.Collection name where both dataset and collection names are valid FidesKeys +FidesCollectionKey = Annotated[str, AfterValidator(validate_collection_key_parts)] def sort_list_objects_by_name(values: List) -> List: @@ -77,14 +94,14 @@ def unique_items_in_list(values: List) -> List: return values -def no_self_reference(value: FidesKey, values: Dict) -> FidesKey: +def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: """ Check to make sure that the fides_key doesn't match other fides_key references within an object. i.e. DataCategory.parent_key != DataCategory.fides_key """ - fides_key = FidesKey.validate(values.get("fides_key", "")) + fides_key = FidesKey(info.data.get("fides_key", "")) if value == fides_key: raise FidesValidationError("FidesKey can not self-reference!") return value @@ -115,7 +132,7 @@ def deprecated_version_later_than_added( return version_deprecated -def has_versioning_if_default(is_default: bool, values: Dict) -> bool: +def has_versioning_if_default(is_default: bool, info: FieldValidationInfo) -> bool: """ Check to make sure that version fields are set for default items. """ @@ -123,15 +140,15 @@ def has_versioning_if_default(is_default: bool, values: Dict) -> bool: # If it's a default item, it at least needs a starting version if is_default: try: - assert values.get("version_added") + assert info.data.get("version_added") except AssertionError: raise FidesValidationError("Default items must have version information!") # If it's not default, it shouldn't have version info else: try: - assert not values.get("version_added") - assert not values.get("version_deprecated") - assert not values.get("replaced_by") + assert not info.data.get("version_added") + assert not info.data.get("version_deprecated") + assert not info.data.get("replaced_by") except AssertionError: raise FidesValidationError( "Non-default items can't have version information!" @@ -151,12 +168,12 @@ def is_deprecated_if_replaced(replaced_by: str, values: Dict) -> str: return replaced_by -def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: +def matching_parent_key(parent_key: FidesKey, info: FieldValidationInfo) -> FidesKey: """ Confirm that the parent_key matches the parent parsed from the FidesKey. """ - fides_key = FidesKey.validate(values.get("fides_key", "")) + fides_key = FidesKey(info.data.get("fides_key", "")) split_fides_key = fides_key.split(".") # Check if it is a top-level resource diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index a932f26b..d34fae64 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -13,7 +13,6 @@ DatasetField, DatasetMetadata, DataUse, - FidesCollectionKey, FidesDatasetReference, FidesMeta, FidesModel, @@ -87,7 +86,7 @@ def test_deprecated_after_added(self, TaxonomyClass): @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: """Try building from a dictionary with explicit None values.""" - TaxonomyClass.parse_obj( + TaxonomyClass.model_validate( { "organization_fides_key": 1, "fides_key": "user", @@ -760,13 +759,11 @@ def test_data_categories_on_nested_fields(self): class TestCollectionMeta: def test_invalid_collection_key(self): with pytest.raises(ValidationError): - CollectionMeta(after=[FidesCollectionKey("test_key")]) + CollectionMeta(after=["test_key"]) def test_collection_key_has_too_many_components(self): with pytest.raises(ValidationError): - CollectionMeta( - after=[FidesCollectionKey("test_dataset.test_collection.test_field")] - ) + CollectionMeta(after=["test_dataset.test_collection.test_field"]) def test_valid_collection_key(self): - CollectionMeta(after=[FidesCollectionKey("test_dataset.test_collection")]) + CollectionMeta(after=["test_dataset.test_collection"]) From 7693bcb580ba7f7761028c9618afadbb0b7543a6 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 10 Nov 2023 16:19:06 +0800 Subject: [PATCH 04/27] feat: remove data qualifiers --- README.md | 13 ----- demo_resources/demo_dataset.yml | 10 ---- demo_resources/demo_policy.yml | 1 - demo_resources/demo_system.yml | 2 - mkdocs/docs/index.md | 16 +------ mkdocs/docs/js/vis.js | 21 +-------- mkdocs/docs/js/vis2-absolute.js | 21 +-------- mkdocs/docs/js/vis2.js | 21 +-------- mkdocs/docs/resources/dataset.md | 6 --- mkdocs/docs/resources/policy.md | 8 +--- mkdocs/docs/resources/system.md | 6 +-- mkdocs/mkdocs.yml | 3 +- scripts/export_default_taxonomy.py | 1 - src/fideslang/__init__.py | 2 - src/fideslang/default_taxonomy/__init__.py | 2 - .../default_taxonomy/data_qualifiers.py | 45 ------------------ src/fideslang/default_taxonomy/utils.py | 4 +- src/fideslang/models.py | 47 +------------------ tests/conftest.py | 10 ---- .../failing_dataset_collection_taxonomy.yml | 4 -- tests/data/failing_dataset_field_taxonomy.yml | 4 -- tests/data/failing_dataset_taxonomy.yml | 4 -- tests/data/failing_declaration_taxonomy.yml | 2 - tests/data/failing_nested_dataset.yml | 2 - tests/data/passing_declaration_taxonomy.yml | 2 - tests/fideslang/test_default_taxonomy.py | 7 ++- tests/fideslang/test_models.py | 31 ------------ tests/fideslang/test_relationships.py | 11 ----- tests/fideslang/test_validation.py | 7 +-- 29 files changed, 18 insertions(+), 295 deletions(-) delete mode 100644 src/fideslang/default_taxonomy/data_qualifiers.py diff --git a/README.md b/README.md index d2d4729f..b8462107 100644 --- a/README.md +++ b/README.md @@ -48,19 +48,6 @@ Examples of a Data Subject are: Learn more about [Data Subject Categories in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_subjects/). -### 4. Data Identification Qualifiers - -Data Identification Qualifiers describe the degree of identification of the given data. -Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it; on the other end is data that specifically identifies an individual. - -Along this spectrum are labels that describe the degree of identification that a given data might provide, such as: - -- `identified_data` -- `anonymized_data` -- `aggregated_data` - -Learn more about [Data Identification Qualifiers in the taxonomy reference now](https://ethyca.github.io/fideslang/taxonomy/data_qualifiers/). - ### Extensibility & Interoperability The taxonomy is designed to support common privacy compliance regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. diff --git a/demo_resources/demo_dataset.yml b/demo_resources/demo_dataset.yml index 29867d45..97be8205 100644 --- a/demo_resources/demo_dataset.yml +++ b/demo_resources/demo_dataset.yml @@ -5,8 +5,6 @@ dataset: description: Data collected about users for our analytics system. meta: null data_categories: [] - data_qualifiers: - - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: "30 days after account deletion" third_country_transfers: - GBR @@ -15,37 +13,29 @@ dataset: - name: users description: User information data_categories: [] - data_qualifiers: - - aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified fields: - name: created_at description: User's creation timestamp data_categories: - system.operations - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: email description: User's Email data_categories: - user.contact.email - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: Account termination - name: first_name description: User's first name data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified retention: Account termination - name: food_preference description: User's favorite food data_categories: [] - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: state description: User's State data_categories: - user.contact.state - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: uuid description: User's unique ID data_categories: - user.unique_id - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_policy.yml b/demo_resources/demo_policy.yml index 61ff9c3b..159175bb 100644 --- a/demo_resources/demo_policy.yml +++ b/demo_resources/demo_policy.yml @@ -17,4 +17,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/demo_resources/demo_system.yml b/demo_resources/demo_system.yml index 72448167..570765b9 100644 --- a/demo_resources/demo_system.yml +++ b/demo_resources/demo_system.yml @@ -23,7 +23,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ingress: - demo_users_dataset @@ -40,4 +39,3 @@ system: data_use: advertising data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index f0b2c567..2dbed3d0 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -59,6 +59,7 @@ Data Uses are also hierarchical with natural inheritance, meaning you can easily Learn more about [Data Uses in the taxonomy reference now](taxonomy/data_uses.md). ### 3. Data Subjects + Data Subjects is a label commonly used in the regulatory world to describe the users of a system who's data is being processed. In many systems a generic user label may be sufficient, however the taxonomy is intended to provide greater control through specificity where needed. Examples of this are: @@ -71,23 +72,10 @@ Examples of this are: Learn more about [Data Subjects in the taxonomy reference now](taxonomy/data_subjects.md). -### 4. Data Qualifiers -Data Qualifiers describe the degree of identification of the given data. Think of this as a spectrum: on one end is completely anonymous data, i.e. it is impossible to identify an individual from it, and on the other end is data that specifically identifies an individual. - -Along this spectrum are labels that describe the degree of identification that a given data might provide, such as: - -- `identified` -- `anonymized` -- `aggregated` - -Learn more about [Data Qualifiers in the taxonomy reference now](taxonomy/data_qualifiers.md). - ### Extensibility and Interoperability + The taxonomy is designed to support common privacy compliance regulations `and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing class structures to ensure interoperability inside and outside your organization. If you have suggestions for missing classifications or concepts, please submit them for addition. - - - diff --git a/mkdocs/docs/js/vis.js b/mkdocs/docs/js/vis.js index 063d4fb2..e78ca437 100644 --- a/mkdocs/docs/js/vis.js +++ b/mkdocs/docs/js/vis.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("csv/data_categories.csv"), d3.csv("csv/data_uses.csv"), d3.csv("csv/data_subjects.csv"), - d3.csv("csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { diff --git a/mkdocs/docs/js/vis2-absolute.js b/mkdocs/docs/js/vis2-absolute.js index 9a001726..c1e72506 100644 --- a/mkdocs/docs/js/vis2-absolute.js +++ b/mkdocs/docs/js/vis2-absolute.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("https://ethyca.github.io/fideslang/csv/data_categories.csv"), d3.csv("https://ethyca.github.io/fideslang/csv/data_uses.csv"), d3.csv("https://ethyca.github.io/fideslang/csv/data_subjects.csv"), - d3.csv("https://ethyca.github.io/fideslang/csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { diff --git a/mkdocs/docs/js/vis2.js b/mkdocs/docs/js/vis2.js index c06794c8..df501c35 100644 --- a/mkdocs/docs/js/vis2.js +++ b/mkdocs/docs/js/vis2.js @@ -733,8 +733,7 @@ Promise.all([ d3.csv("../csv/data_categories.csv"), d3.csv("../csv/data_uses.csv"), d3.csv("../csv/data_subjects.csv"), - d3.csv("../csv/data_qualifiers.csv"), -]).then(([categoriesCSV, usesCSV, subjectsCSV, qualifiersCSV]) => { +]).then(([categoriesCSV, usesCSV, subjectsCSV]) => { const tooltip = new VisTooltip(); const colors = { @@ -820,24 +819,6 @@ Promise.all([ "#f73ffc", "#fb409e", ]), - qualifiers: d3 - .scaleOrdinal() - .domain([ - "data_qualifier", - "aggregated", - "aggregated.anonymized", - "aggregated.anonymized.unlinked_pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - ]) - .range([ - "#2a3045", - "#0861ce", - "#8459cc", - "#c14cbb", - "#ed43a0", - "#ff4a7f", - ]), }; const accessor = { diff --git a/mkdocs/docs/resources/dataset.md b/mkdocs/docs/resources/dataset.md index 3c70c521..1a2a20cd 100644 --- a/mkdocs/docs/resources/dataset.md +++ b/mkdocs/docs/resources/dataset.md @@ -56,7 +56,6 @@ An optional array of contact information if a Joint Controller exists. This info An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset **data_categories**     [_string_]
-**data_qualifiers**     [_string_]
Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, that apply to all collections in the Dataset. @@ -73,7 +72,6 @@ A UI-friendly label for the collection. A human-readable description of the collection. **collections.data_categories**     [_string_]
-**collections.data_qualifiers**     [_string_]
Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, that apply to all fields in the collection. @@ -97,10 +95,6 @@ A human-readable description of the field. Arrays of Data Categories, identified by `fides_key`, that applies to this field. -**collections.fields.data_qualifier**     _string_
- -A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular. - **collections.fields.retention**  _string_ An optional string to describe the retention policy for a field within a Dataset collection. diff --git a/mkdocs/docs/resources/policy.md b/mkdocs/docs/resources/policy.md index fa700054..ce7d1051 100644 --- a/mkdocs/docs/resources/policy.md +++ b/mkdocs/docs/resources/policy.md @@ -1,6 +1,6 @@ # Policy -A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 4 privacy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`). The purpose of the policy is to state what types of data are allowed for certain usages. +A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 3 privacy attributes (`data_category`, `data_use`, `data_subject`). The purpose of the policy is to state what types of data are allowed for certain usages. ``` organization @@ -34,10 +34,6 @@ The [Data Use](../../taxonomy/data_uses/) privacy attribute describes the variou The [Data Subjects](../../taxonomy/data_subjects/) privacy attribute describes the individual persons whose data your rule pertains to. -**data_qualifier**     _string_      - -The [Data Qualifier](../../taxonomy/data_qualifiers/) privacy attribute describes the acceptable or non-acceptable level of deidentification for this data. - **matches**     _enum_      * `ANY` @@ -76,7 +72,6 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ``` **Demo manifest file:** `/fides/demo_resources/demo_policy.yml` @@ -113,7 +108,6 @@ policy: "customer" ] }, - "data_qualifier": "aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified" } ] } diff --git a/mkdocs/docs/resources/system.md b/mkdocs/docs/resources/system.md index 09d27508..f5e457ec 100644 --- a/mkdocs/docs/resources/system.md +++ b/mkdocs/docs/resources/system.md @@ -1,6 +1,6 @@ # System -A System is a model for describing anything that processes data for your organization (applications, services, 3rd party APIs, etc.) and describes how these datasets are used for business functions of instances of your data resources. It contains all 4 privacy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`). +A System is a model for describing anything that processes data for your organization (applications, services, 3rd party APIs, etc.) and describes how these datasets are used for business functions of instances of your data resources. It contains all 3 privacy attributes (`data_category`, `data_use`, and `data_subject`). ``` organization @@ -64,7 +64,7 @@ The resources from which the System receives data. **privacy_declarations**     [array]      -The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`) for each of your systems. +The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, and `data_subject`) for each of your systems. If a dataset is referenced as part of the system, all applicable data categories set on the dataset are treated as part of the system. @@ -114,7 +114,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: identified_data egress: - another_demo_system ingress: @@ -166,7 +165,6 @@ system: "data_subjects": [ "customer" ], - "data_qualifier": "identified_data", "egress": ["another_demo_system"], "ingress": ["yet_another_demo_system"] } diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index cae7cf78..a76f02b7 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -13,7 +13,6 @@ nav: - Data Categories: taxonomy/data_categories.md - Data Uses: taxonomy/data_uses.md - Data Subjects: taxonomy/data_subjects.md - - Data Qualifiers: taxonomy/data_qualifiers.md - Resources: - Organization: resources/organization.md - Policy: resources/policy.md @@ -63,4 +62,4 @@ extra_css: - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.7.2/styles/default.min.css - css/fides.css - css/taxonomy.css - - css/logo.css \ No newline at end of file + - css/logo.css diff --git a/scripts/export_default_taxonomy.py b/scripts/export_default_taxonomy.py index fe1b3400..7d2d96cc 100644 --- a/scripts/export_default_taxonomy.py +++ b/scripts/export_default_taxonomy.py @@ -13,7 +13,6 @@ FILE_RESOURCE_PAIRS: Tuple[Tuple[str, str], ...] = ( ("data_categories", "data_category"), ("data_subjects", "data_subject"), - ("data_qualifiers", "data_qualifier"), ("data_uses", "data_use"), ) DATA_DIR = "data_files" diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py index 4295434d..9d18a523 100644 --- a/src/fideslang/__init__.py +++ b/src/fideslang/__init__.py @@ -27,7 +27,6 @@ from .models import ( DataCategory, DataFlow, - DataQualifier, Dataset, DatasetField, DatasetFieldBase, @@ -51,7 +50,6 @@ FidesModelType = Union[Type[FidesModel], Type[Evaluation]] model_map: Dict[str, FidesModelType] = { "data_category": DataCategory, - "data_qualifier": DataQualifier, "data_subject": DataSubject, "data_use": DataUse, "dataset": Dataset, diff --git a/src/fideslang/default_taxonomy/__init__.py b/src/fideslang/default_taxonomy/__init__.py index 47937331..b032904b 100644 --- a/src/fideslang/default_taxonomy/__init__.py +++ b/src/fideslang/default_taxonomy/__init__.py @@ -3,7 +3,6 @@ from fideslang.models import Taxonomy from .data_categories import DEFAULT_DATA_CATEGORIES -from .data_qualifiers import DEFAULT_DATA_QUALIFIERS from .data_subjects import DEFAULT_DATA_SUBJECTS from .data_uses import DEFAULT_DATA_USES from .organizations import DEFAULT_ORGANIZATIONS @@ -14,7 +13,6 @@ DEFAULT_TAXONOMY = Taxonomy( data_category=sorted(DEFAULT_DATA_CATEGORIES, key=sort_data_types), - data_qualifier=sorted(DEFAULT_DATA_QUALIFIERS, key=sort_data_types), data_subject=sorted(DEFAULT_DATA_SUBJECTS, key=sort_data_types), data_use=sorted(DEFAULT_DATA_USES, key=sort_data_types), organization=DEFAULT_ORGANIZATIONS, diff --git a/src/fideslang/default_taxonomy/data_qualifiers.py b/src/fideslang/default_taxonomy/data_qualifiers.py deleted file mode 100644 index eb46e2cc..00000000 --- a/src/fideslang/default_taxonomy/data_qualifiers.py +++ /dev/null @@ -1,45 +0,0 @@ -from functools import partial - -from fideslang.models import DataQualifier - -from .utils import default_factory - -default_qualifier_factory = partial(default_factory, taxonomy_class=DataQualifier) - - -DEFAULT_DATA_QUALIFIERS = [ - default_qualifier_factory( - fides_key="aggregated", - organization_fides_key="default_organization", - name="Aggregated Data", - description="Statistical data that does not contain individually identifying information but includes information about groups of individuals that renders individual identification impossible.", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized", - organization_fides_key="default_organization", - name="Anonymized Data", - description="Data where all attributes have been sufficiently altered that the individaul cannot be reidentified by this data or in combination with other datasets.", - parent_key="aggregated", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized", - organization_fides_key="default_organization", - name="Unlinked Pseudonymized Data", - description="Data for which all identifiers have been substituted with unrelated values and linkages broken such that it may not be reversed, even by the party that performed the pseudonymization.", - parent_key="aggregated.anonymized", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - organization_fides_key="default_organization", - name="Pseudonymized Data", - description="Data for which all identifiers have been substituted with unrelated values, rendering the individual unidentifiable and cannot be reasonably reversed other than by the party that performed the pseudonymization.", - parent_key="aggregated.anonymized.unlinked_pseudonymized", - ), - default_qualifier_factory( - fides_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - organization_fides_key="default_organization", - name="Identified Data", - description="Data that directly identifies an individual.", - parent_key="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", - ), -] diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index dc85cabf..f97cc94f 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -1,8 +1,8 @@ from typing import Dict, Union -from fideslang.models import DataCategory, DataQualifier, DataSubject, DataUse +from fideslang.models import DataCategory, DataSubject, DataUse -CustomType = Union[DataCategory, DataSubject, DataQualifier, DataUse] +CustomType = Union[DataCategory, DataSubject, DataUse] def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: diff --git a/src/fideslang/models.py b/src/fideslang/models.py index b5c177a2..2733cf61 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -286,15 +286,6 @@ class DataCategory(FidesModel, DefaultModel): _no_self_reference: classmethod = no_self_reference_validator -class DataQualifier(FidesModel, DefaultModel): - """The DataQualifier resource model.""" - - parent_key: Optional[FidesKey] - - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator - - class Cookies(BaseModel): """The Cookies resource model""" @@ -442,10 +433,6 @@ class MyDatasetField(DatasetFieldBase): data_categories: Optional[List[FidesKey]] = Field( description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="A Data Qualifier that applies to this field. Note that this field holds a single value, therefore, the property name is singular.", - ) retention: Optional[str] = Field( description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) @@ -619,10 +606,6 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): data_categories: Optional[List[FidesKey]] = Field( description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="Array of Data Qualifier resources identified by `fides_key`, that apply to all fields in the collection.", - ) retention: Optional[str] = Field( description="An optional string to describe the retention policy for a Dataset collection. This field can also be applied more granularly at the field level of a Dataset.", ) @@ -687,9 +670,6 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): data_categories: Optional[List[FidesKey]] = Field( description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", ) - data_qualifier: Optional[FidesKey] = Field( - description="Deprecated. Array of Data Qualifier resources identified by `fides_key`, that apply to all collections in the Dataset.", - ) fides_meta: Optional[DatasetMetadata] = Field( description=DatasetMetadata.__doc__, default=None ) @@ -720,9 +700,9 @@ def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Dataset fields pending deprecation. """ + # TODO: Do we want to remove these for Fideslang 3? deprecated_fields = [ "joint_controller", - "data_qualifier", "retention", "third_country_transfers", ] @@ -748,9 +728,6 @@ class ViolationAttributes(BaseModel): data_uses: List[str] = Field( description="A list of data uses which led to an evaluation violation.", ) - data_qualifier: str = Field( - description="The data qualifier which led to an evaluation violation.", - ) class Violation(BaseModel): @@ -898,10 +875,6 @@ class PolicyRule(BaseModel): data_subjects: PrivacyRule = Field( description=PrivacyRule.__doc__, ) - data_qualifier: FidesKey = Field( - default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", - description="The fides key of the data qualifier to be used in a privacy rule.", - ) class Policy(FidesModel): @@ -972,9 +945,6 @@ class PrivacyDeclaration(BaseModel): data_use: FidesKey = Field( description="The Data Use describing a system in a privacy declaration.", ) - data_qualifier: Optional[FidesKey] = Field( - description="Deprecated. The fides key of the data qualifier describing a system in a privacy declaration.", - ) data_subjects: List[FidesKey] = Field( default_factory=list, description="An array of data subjects describing a system in a privacy declaration.", @@ -1025,20 +995,6 @@ class PrivacyDeclaration(BaseModel): description="Cookies associated with this data use to deliver services and functionality", ) - @validator("data_qualifier") - @classmethod - def deprecate_data_qualifier(cls, value: FidesKey) -> FidesKey: - """ - Warn that the `data_qualifier` field is deprecated, if set. - """ - if value is not None: - warn( - "The data_qualifier field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - - return value - class Config: """Config for the Privacy Declaration""" @@ -1325,7 +1281,6 @@ class Taxonomy(BaseModel): data_category: List[DataCategory] = Field(default_factory=list) data_subject: Optional[List[DataSubject]] = Field(default_factory=list) data_use: Optional[List[DataUse]] = Field(default_factory=list) - data_qualifier: Optional[List[DataQualifier]] = Field(default_factory=list) dataset: Optional[List[Dataset]] = Field(default_factory=list) system: Optional[List[System]] = Field(default_factory=list) diff --git a/tests/conftest.py b/tests/conftest.py index e41f992f..89a099f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,12 +22,6 @@ def resources_dict(): name="Custom Data Category", description="Custom Data Category", ), - "data_qualifier": models.DataQualifier( - organization_fides_key=1, - fides_key="custom_data_qualifier", - name="Custom Data Qualifier", - description="Custom Data Qualifier", - ), "dataset": models.Dataset( organization_fides_key=1, fides_key="test_sample_db_dataset", @@ -47,14 +41,12 @@ def resources_dict(): description="A First Name Field", path="another.path", data_categories=["user.name"], - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", ), models.DatasetField( name="Email", description="User's Email", path="another.another.path", data_categories=["user.contact.email"], - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified", ), ], ) @@ -93,7 +85,6 @@ def resources_dict(): data_categories=models.PrivacyRule(matches="NONE", values=[]), data_uses=models.PrivacyRule(matches="NONE", values=["provide.system"]), data_subjects=models.PrivacyRule(matches="ANY", values=[]), - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", ), "registry": models.Registry( organization_fides_key=1, @@ -115,7 +106,6 @@ def resources_dict(): data_categories=[], data_use="provide", data_subjects=[], - data_qualifier="aggregated_data", ) ], ), diff --git a/tests/data/failing_dataset_collection_taxonomy.yml b/tests/data/failing_dataset_collection_taxonomy.yml index 6b0ae18e..dd29d344 100644 --- a/tests/data/failing_dataset_collection_taxonomy.yml +++ b/tests/data/failing_dataset_collection_taxonomy.yml @@ -7,13 +7,11 @@ dataset: description: User's information data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized fields: - name: First_Name description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified system: - fides_key: customer_data_sharing_system @@ -28,7 +26,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -53,4 +50,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_dataset_field_taxonomy.yml b/tests/data/failing_dataset_field_taxonomy.yml index 9891efcc..502b6eec 100644 --- a/tests/data/failing_dataset_field_taxonomy.yml +++ b/tests/data/failing_dataset_field_taxonomy.yml @@ -10,12 +10,10 @@ dataset: description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - name: political_opinion description: User's political opinion data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized system: - fides_key: customer_data_sharing_system name: Customer Data Sharing System @@ -29,7 +27,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -54,4 +51,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_dataset_taxonomy.yml b/tests/data/failing_dataset_taxonomy.yml index 61e2fb92..01cff66b 100644 --- a/tests/data/failing_dataset_taxonomy.yml +++ b/tests/data/failing_dataset_taxonomy.yml @@ -4,7 +4,6 @@ dataset: description: This is a Sample Database Dataset data_categories: - user.political_opinion - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized collections: - name: users description: User's information @@ -13,7 +12,6 @@ dataset: description: A First Name Field data_categories: - user.name - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified system: - fides_key: customer_data_sharing_system @@ -28,7 +26,6 @@ system: data_categories: - user data_use: advertising - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer ingress: @@ -53,4 +50,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated diff --git a/tests/data/failing_declaration_taxonomy.yml b/tests/data/failing_declaration_taxonomy.yml index 53b1a42c..ebca27b6 100644 --- a/tests/data/failing_declaration_taxonomy.yml +++ b/tests/data/failing_declaration_taxonomy.yml @@ -8,7 +8,6 @@ system: data_categories: - user.political_opinion data_use: third_party_sharing.payment_processing - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer @@ -31,4 +30,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/failing_nested_dataset.yml b/tests/data/failing_nested_dataset.yml index a0e005c2..2a1ac33d 100644 --- a/tests/data/failing_nested_dataset.yml +++ b/tests/data/failing_nested_dataset.yml @@ -34,7 +34,6 @@ system: data_use: improve.system data_subjects: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified ingress: - test_failing_nested_dataset_field @@ -57,4 +56,3 @@ policy: matches: OTHER values: - anonymous_user - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/data/passing_declaration_taxonomy.yml b/tests/data/passing_declaration_taxonomy.yml index d2ede112..aac30812 100644 --- a/tests/data/passing_declaration_taxonomy.yml +++ b/tests/data/passing_declaration_taxonomy.yml @@ -8,7 +8,6 @@ system: data_categories: - user.political_opinion data_use: third_party_sharing.payment_processing - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer @@ -31,4 +30,3 @@ policy: matches: ANY values: - customer - data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/tests/fideslang/test_default_taxonomy.py b/tests/fideslang/test_default_taxonomy.py index c47a749b..d04f8e9b 100644 --- a/tests/fideslang/test_default_taxonomy.py +++ b/tests/fideslang/test_default_taxonomy.py @@ -9,7 +9,6 @@ "data_category": 85, "data_use": 55, "data_subject": 15, - "data_qualifier": 5, } @@ -46,7 +45,11 @@ def test_name_uniqueness(self, data_type: str) -> None: @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) def test_description_uniqueness(self, data_type: str) -> None: - keys = [x.description for x in getattr(DEFAULT_TAXONOMY, data_type) if not x.version_deprecated] + keys = [ + x.description + for x in getattr(DEFAULT_TAXONOMY, data_type) + if not x.version_deprecated + ] duplicate_keys = { key: value for key, value in Counter(keys).items() if value > 1 } diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 98bfb292..1a5fae76 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -69,7 +69,6 @@ class TestPrivacyDeclaration: def test_privacydeclaration_valid(self) -> None: assert PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=[], @@ -77,19 +76,6 @@ def test_privacydeclaration_valid(self) -> None: name="declaration-name", ) - def test_privacy_declaration_data_qualifier_deprecation(self) -> None: - with deprecated_call(match="data_qualifier"): - assert PrivacyDeclaration( - data_categories=[], - data_qualifier="aggregated_data", - data_subjects=[], - data_use="provide", - dataset_references=[], - egress=["test_system_2"], - ingress=["test_system_3"], - name="declaration-name", - ) - class TestSystem: # TODO: these tests are not effectively evaluating whether the provided constructor args @@ -121,7 +107,6 @@ def test_system_valid(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -172,7 +157,6 @@ def test_system_valid_nested_meta(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -209,7 +193,6 @@ def test_system_valid_no_meta(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -233,7 +216,6 @@ def test_system_valid_no_egress_or_ingress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", name="declaration-name", @@ -262,7 +244,6 @@ def test_system_no_egress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -293,7 +274,6 @@ def test_system_no_ingress(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", egress=["test_system_2"], @@ -323,7 +303,6 @@ def test_system_user_ingress_valid(self) -> None: privacy_declarations=[ PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", ingress=["user"], @@ -376,7 +355,6 @@ def test_expanded_system(self): "user.demographic", "user.privacy_preferences", ], - data_qualifier="aggregated_data", data_use="functional.storage", data_subjects=[], egress=["test_system_2"], @@ -489,20 +467,17 @@ def test_valid_dataset(self): } }, }, - data_qualifier="dataset_qualifier_1", data_categories=["dataset_data_category_1"], fides_meta={"after": ["other_dataset"]}, collections=[ DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"after": ["third_dataset.blue_collection"]}, fields=[ DatasetField( name="dataset_field_1", data_categories=["dataset_field_data_category_1"], - data_qualifier="dataset_field_data_qualifier_1", fides_meta={ "references": [ { @@ -519,14 +494,12 @@ def test_valid_dataset(self): ), DatasetCollection( name="dataset_collection_2", - data_qualifier="data_collection_data_qualifier_2", data_categories=["dataset_collection_data_category_2"], fides_meta={"after": ["orange_dataset.dataset_collection_1"]}, fields=[ DatasetField( name="dataset_field_2", data_categories=["dataset_field_data_category_2"], - data_qualifier="dataset_field_data_qualifier_2", fides_meta={ "identity": "email", "primary_key": False, @@ -541,7 +514,6 @@ def test_valid_dataset(self): @mark.parametrize( "deprecated_field,value", [ - ("data_qualifier", "dataset_qualifier_1"), ("joint_controller", {"name": "Controller_name"}), ("retention", "90 days"), ("third_country_transfers", ["IRL"]), @@ -560,7 +532,6 @@ def test_dataset_deprecated_fields(self, deprecated_field, value) -> None: def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fields=[], ) @@ -568,7 +539,6 @@ def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"after": ["third_dataset.blue_collection"]}, fields=[], @@ -578,7 +548,6 @@ def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fides_meta={"skip_processing": True}, fields=[], diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index d9628442..8a10ec32 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -74,7 +74,6 @@ def test_dont_find_other_objects(self) -> None: name="privacy_declaration_1", data_categories=["privacy_declaration_data_category_1"], data_use="privacy_declaration_data_use_1", - data_qualifier="privacy_declaration_data_qualifier_1", data_subjects=[ "privacy_declaration_data_subject_1", "privacy_declaration_data_subject_2", @@ -91,7 +90,6 @@ def test_dont_find_other_objects(self) -> None: "default_organization", "privacy_declaration_data_category_1", "privacy_declaration_data_use_1", - "privacy_declaration_data_qualifier_1", "privacy_declaration_data_subject_1", "privacy_declaration_data_subject_2", "privacy_declaration_data_set_1", @@ -192,7 +190,6 @@ def test_get_referenced_missing_privacy_declaration_keys(self): name="privacy_declaration_1", data_categories=["privacy_declaration_data_category_1"], data_use="privacy_declaration_data_use_1", - data_qualifier="privacy_declaration_data_qualifier_1", data_subjects=["privacy_declaration_data_subject_1"], dataset_references=["privacy_declaration_data_set_1"], ) @@ -204,7 +201,6 @@ def test_get_referenced_missing_privacy_declaration_keys(self): "default_organization", "privacy_declaration_data_category_1", "privacy_declaration_data_use_1", - "privacy_declaration_data_qualifier_1", "privacy_declaration_data_subject_1", "privacy_declaration_data_set_1", } @@ -231,7 +227,6 @@ def test_get_referenced_missing_policy_keys(self): "values": ["policy_rule_data_subject_1"], "matches": MatchesEnum.ANY, }, - data_qualifier="policy_rule_data_qualifier_1", ) ], ) @@ -242,7 +237,6 @@ def test_get_referenced_missing_policy_keys(self): "policy_rule_data_category_1", "policy_rule_data_use_1", "policy_rule_data_subject_1", - "policy_rule_data_qualifier_1", } referenced_keys = relationships.get_referenced_missing_keys(taxonomy) assert not referenced_keys.difference(expected_referenced_key) @@ -252,18 +246,15 @@ def test_get_referenced_missing_dataset_keys(self): dataset=[ Dataset( fides_key="dataset_1", - data_qualifier="dataset_qualifier_1", data_categories=["dataset_data_category_1"], collections=[ DatasetCollection( name="dataset_collection_1", - data_qualifier="data_collection_data_qualifier_1", data_categories=["dataset_collection_data_category_1"], fields=[ DatasetField( name="dataset_field_1", data_categories=["dataset_field_data_category_1"], - data_qualifier="dataset_field_data_qualifier_1", ) ], ) @@ -275,10 +266,8 @@ def test_get_referenced_missing_dataset_keys(self): "default_organization", "dataset_qualifier_1", "dataset_data_category_1", - "data_collection_data_qualifier_1", "dataset_collection_data_category_1", "dataset_field_data_category_1", - "dataset_field_data_qualifier_1", } referenced_keys = relationships.get_referenced_missing_keys(taxonomy) assert not referenced_keys.difference(expected_referenced_key) diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index a932f26b..7f87d141 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -8,7 +8,6 @@ Dataset, DataUse, DataSubject, - DataQualifier, DatasetCollection, DatasetField, DatasetMetadata, @@ -25,7 +24,7 @@ ) from fideslang.validation import FidesKey, FidesValidationError, valid_data_type -DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataQualifier, DataSubject] +DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject] @pytest.mark.unit @@ -412,7 +411,6 @@ def test_valid_policy_rule(): data_categories=PrivacyRule(matches="NONE", values=[]), data_uses=PrivacyRule(matches="NONE", values=["provide.service"]), data_subjects=PrivacyRule(matches="ANY", values=[]), - data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized", ) @@ -444,7 +442,6 @@ def test_create_valid_system(): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=[], ) ], @@ -477,7 +474,6 @@ def test_invalid_country_identifier(country_code: str): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=["test_system"], ) ], @@ -503,7 +499,6 @@ def test_valid_country_identifier(country_code: str): data_categories=[], data_use="provide.service", data_subjects=[], - data_qualifier="aggregated_data", dataset_references=["test_system"], ) ], From 3e486b195a96d27cda1e7ff2c59196b9035647e5 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 10 Nov 2023 16:28:01 +0800 Subject: [PATCH 05/27] feat: more qualifier removals --- mkdocs/docs/explorer.md | 4 ---- mkdocs/docs/index.md | 1 - mkdocs/docs/js/vis.js | 2 -- mkdocs/docs/js/vis2-absolute.js | 2 -- mkdocs/docs/js/vis2.js | 2 -- tests/fideslang/test_relationships.py | 1 - 6 files changed, 12 deletions(-) diff --git a/mkdocs/docs/explorer.md b/mkdocs/docs/explorer.md index 61d29ac8..b3f7122a 100644 --- a/mkdocs/docs/explorer.md +++ b/mkdocs/docs/explorer.md @@ -9,7 +9,6 @@ The taxonomy explorer is a useful way to visualize and review the taxonomy for t -
@@ -35,6 +34,3 @@ The taxonomy explorer is a useful way to visualize and review the taxonomy for t
- - - diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 2dbed3d0..b6ae41c8 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -17,7 +17,6 @@ The Fides taxonomy, or categorization, is made up of four main classification gr -
diff --git a/mkdocs/docs/js/vis.js b/mkdocs/docs/js/vis.js index e78ca437..014d02fe 100644 --- a/mkdocs/docs/js/vis.js +++ b/mkdocs/docs/js/vis.js @@ -840,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/mkdocs/docs/js/vis2-absolute.js b/mkdocs/docs/js/vis2-absolute.js index c1e72506..71eef370 100644 --- a/mkdocs/docs/js/vis2-absolute.js +++ b/mkdocs/docs/js/vis2-absolute.js @@ -840,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/mkdocs/docs/js/vis2.js b/mkdocs/docs/js/vis2.js index df501c35..93051cea 100644 --- a/mkdocs/docs/js/vis2.js +++ b/mkdocs/docs/js/vis2.js @@ -840,13 +840,11 @@ Promise.all([ const categoriesRoot = stratify(categoriesCSV); const usesRoot = stratify(usesCSV); const subjectsRoot = stratify(subjectsCSV); - const qualifiersRoot = stratify(qualifiersCSV); const chartData = { categories: categoriesRoot, uses: usesRoot, subjects: subjectsRoot, - qualifiers: qualifiersRoot, }; const chartDataButtons = d3 .select("#data-control") diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index 8a10ec32..54ca20ac 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -264,7 +264,6 @@ def test_get_referenced_missing_dataset_keys(self): ) expected_referenced_key = { "default_organization", - "dataset_qualifier_1", "dataset_data_category_1", "dataset_collection_data_category_1", "dataset_field_data_category_1", From 20a07856abdbd5845c1f887c6bf61f9d0aedf6e6 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 10 Nov 2023 19:23:16 +0800 Subject: [PATCH 06/27] fix: mypy and pylint --- src/fideslang/models.py | 45 ++++++++++++++++++++++++------------- src/fideslang/validation.py | 15 +++++++------ 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 32034572..edd84815 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -6,7 +6,7 @@ from __future__ import annotations from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union from warnings import warn from pydantic import ( @@ -111,6 +111,7 @@ class DefaultModel(BaseModel): _is_deprecated_if_replaced = is_deprecated_if_replaced_validator @field_validator("version_added") + @classmethod def validate_verion_added(cls, version_added: Optional[str]) -> Optional[str]: """ Validate that the `version_added` field is a proper FidesVersion @@ -122,6 +123,7 @@ def validate_verion_added(cls, version_added: Optional[str]) -> Optional[str]: return version_added @field_validator("version_deprecated") + @classmethod def validate_version_deprecated( cls, version_deprecated: Optional[str] ) -> Optional[str]: @@ -305,6 +307,7 @@ class DataSubjectRights(BaseModel): ) @model_validator(mode="before") + @classmethod def include_exclude_has_values(cls, values: Dict) -> Dict: """ Validate the if include or exclude is chosen, that at least one @@ -359,6 +362,7 @@ class DataUse(FidesModel, DefaultModel): _no_self_reference = no_self_reference_validator @model_validator(mode="before") + @classmethod def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Data Use fields pending deprecation. @@ -379,18 +383,26 @@ def deprecate_fields(cls, values: Dict) -> Dict: return values @field_validator("legitimate_interest") - def set_legitimate_interest(cls, value: bool, values: Dict) -> bool: + @classmethod + def set_legitimate_interest(cls, value: bool, info: FieldValidationInfo) -> bool: """Sets if a legitimate interest is used.""" + values = info.data + if values["legal_basis"] == "Legitimate Interests": value = True return value @field_validator("legitimate_interest_impact_assessment") - def ensure_impact_assessment(cls, value: AnyUrl, values: Dict) -> AnyUrl: + @classmethod + def ensure_impact_assessment( + cls, value: AnyUrl, info: FieldValidationInfo + ) -> AnyUrl: """ Validates an impact assessment is applied if a legitimate interest has been defined. """ + values = info.data + if values["legitimate_interest"]: assert ( value is not None @@ -518,7 +530,6 @@ class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat): @field_validator("fides_meta") @classmethod - @classmethod def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: """Validate upfront that the return_all_elements flag can only be specified on array fields""" if not meta_values: @@ -534,15 +545,17 @@ def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: return meta_values @field_validator("fields") - def validate_object_fields( # type: ignore + @classmethod + def validate_object_fields( cls, fields: Optional[List["DatasetField"]], - values: Dict[str, Any], + info: FieldValidationInfo, ) -> Optional[List["DatasetField"]]: """Two validation checks for object fields: - If there are sub-fields specified, type should be either empty or 'object' - Additionally object fields cannot have data_categories. """ + values = info.data declared_data_type = None field_name: str = values.get("name") # type: ignore @@ -571,7 +584,7 @@ def validate_object_fields( # type: ignore class CollectionMeta(BaseModel): """Collection-level specific annotations used for query traversal""" - after: Optional[List[FidesCollectionKey]] + after: Optional[List[FidesCollectionKey]] = None skip_processing: Optional[bool] = False @@ -668,6 +681,7 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): _unique_items_in_list = field_validator("collections")(unique_items_in_list) @model_validator(mode="before") + @classmethod def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of Dataset fields pending deprecation. @@ -965,11 +979,7 @@ class PrivacyDeclaration(BaseModel): cookies: Optional[List[Cookies]] = Field( description="Cookies associated with this data use to deliver services and functionality", ) - - class Config: - """Config for the Privacy Declaration""" - - orm_mode = True + model_config = ConfigDict(from_attributes=True) class SystemMetadata(BaseModel): @@ -1020,20 +1030,21 @@ class DataFlow(BaseModel): ) @model_validator(mode="after") - def user_special_case(cls, info: FieldValidationInfo) -> FieldValidationInfo: + def user_special_case(self) -> "DataFlow": """ If either the `fides_key` or the `type` are set to "user", then the other must also be set to "user". """ - if info.data["fides_key"] == "user" or info.data["type"] == "user": + if self.fides_key == "user" or self.type == "user": assert ( - info.data["fides_key"] == "user" and info.data["type"] == "user" + self.fides_key == "user" and self.type == "user" ), "The 'user' fides_key is required for, and requires, the type 'user'" - return info + return self @field_validator("type") + @classmethod def verify_type_is_flowable(cls, value: str) -> str: """ Assert that the value of the `type` field is a member @@ -1201,6 +1212,7 @@ class System(FidesModel): _check_valid_country_code = country_code_validator @model_validator(mode="before") + @classmethod def deprecate_fields(cls, values: Dict) -> Dict: """ Warn of System fields pending deprecation. @@ -1220,6 +1232,7 @@ def deprecate_fields(cls, values: Dict) -> Dict: return values @field_validator("privacy_declarations", check_fields=True) + @classmethod def privacy_declarations_reference_data_flows( cls, value: PrivacyDeclaration, diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index cc519591..15416ebb 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -42,7 +42,7 @@ def fides_key_regex_check(value: str) -> str: return value -fides_key_pattern = "^[a-zA-Z0-9_.<>-]+$" +FIDES_KEY_PATTERN = "^[a-zA-Z0-9_.<>-]+$" FidesKey = Annotated[str, AfterValidator(fides_key_regex_check)] @@ -55,11 +55,11 @@ def validate_collection_key_parts(value: str) -> str: if len(values) == 2: FidesKey(values[0]) FidesKey(values[1]) - return value else: raise ValueError( "FidesCollection must be specified in the form 'FidesKey.FidesKey'" ) + return value # Dataset.Collection name where both dataset and collection names are valid FidesKeys @@ -108,7 +108,7 @@ def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: def deprecated_version_later_than_added( - version_deprecated: Optional[FidesVersion], values: Dict + version_deprecated: Optional[FidesVersion], info: FieldValidationInfo ) -> Optional[FidesVersion]: """ Check to make sure that the deprecated version is later than the added version. @@ -120,12 +120,12 @@ def deprecated_version_later_than_added( if not version_deprecated: return None - if version_deprecated < values.get("version_added", Version("0")): + if version_deprecated < info.data.get("version_added", Version("0")): raise FidesValidationError( "Deprecated version number can't be earlier than version added!" ) - if version_deprecated == values.get("version_added", Version("0")): + if version_deprecated == info.data.get("version_added", Version("0")): raise FidesValidationError( "Deprecated version number can't be the same as the version added!" ) @@ -157,10 +157,11 @@ def has_versioning_if_default(is_default: bool, info: FieldValidationInfo) -> bo return is_default -def is_deprecated_if_replaced(replaced_by: str, values: Dict) -> str: +def is_deprecated_if_replaced(replaced_by: str, info: FieldValidationInfo) -> str: """ Check to make sure that the item has been deprecated if there is a replacement. """ + values = info.data if replaced_by and not values.get("version_deprecated"): raise FidesValidationError("Cannot be replaced without deprecation!") @@ -174,7 +175,7 @@ def matching_parent_key(parent_key: FidesKey, info: FieldValidationInfo) -> Fide """ fides_key = FidesKey(info.data.get("fides_key", "")) - split_fides_key = fides_key.split(".") + split_fides_key = str(fides_key).split(".") # Check if it is a top-level resource if len(split_fides_key) == 1 and not parent_key: From d55e284e97392b1dbc84e8ac2898f8d760f92372 Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 13 Nov 2023 16:00:15 +0800 Subject: [PATCH 07/27] fix more failing tests --- src/fideslang/default_taxonomy/utils.py | 2 +- src/fideslang/gvl/__init__.py | 16 +++--- src/fideslang/models.py | 8 +-- src/fideslang/parse.py | 4 +- src/fideslang/validation.py | 61 +++++++++++---------- tests/conftest.py | 24 ++++----- tests/fideslang/test_manifests.py | 16 +++--- tests/fideslang/test_models.py | 34 ++++++------ tests/fideslang/test_parse.py | 8 +-- tests/fideslang/test_validation.py | 72 +++++++++++++------------ 10 files changed, 128 insertions(+), 117 deletions(-) diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index 39133a6e..3550170e 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -18,5 +18,5 @@ def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: # This is the version where we started tracking from, so # we use it as the default starting point. kwargs["version_added"] = "2.0.0" # type: ignore[assignment] - item = taxonomy_class.model_validate(kwargs) + item = taxonomy_class.model_validate(obj=kwargs) return item diff --git a/src/fideslang/gvl/__init__.py b/src/fideslang/gvl/__init__.py index 2a298eab..ec1c7d0a 100644 --- a/src/fideslang/gvl/__init__.py +++ b/src/fideslang/gvl/__init__.py @@ -50,16 +50,16 @@ def _load_data() -> None: ) as mapping_file: data = load(mapping_file) for raw_purpose in data["purposes"].values(): - purpose = Purpose.parse_obj(raw_purpose) - mapped_purpose = MappedPurpose.parse_obj(raw_purpose) + purpose = Purpose.model_validate(raw_purpose) + mapped_purpose = MappedPurpose.model_validate(raw_purpose) GVL_PURPOSES[purpose.id] = purpose MAPPED_PURPOSES[mapped_purpose.id] = mapped_purpose for data_use in mapped_purpose.data_uses: MAPPED_PURPOSES_BY_DATA_USE[data_use] = mapped_purpose for raw_special_purpose in data["specialPurposes"].values(): - special_purpose = Purpose.parse_obj(raw_special_purpose) - mapped_special_purpose = MappedPurpose.parse_obj(raw_special_purpose) + special_purpose = Purpose.model_validate(raw_special_purpose) + mapped_special_purpose = MappedPurpose.model_validate(raw_special_purpose) GVL_SPECIAL_PURPOSES[special_purpose.id] = special_purpose MAPPED_SPECIAL_PURPOSES[mapped_special_purpose.id] = mapped_special_purpose for data_use in mapped_special_purpose.data_uses: @@ -71,12 +71,12 @@ def _load_data() -> None: feature_data = load(feature_mapping_file) for raw_feature in feature_data["features"].values(): - feature = Feature.parse_obj(raw_feature) + feature = Feature.model_validate(raw_feature) GVL_FEATURES[feature.id] = feature FEATURES_BY_NAME[feature.name] = feature for raw_special_feature in feature_data["specialFeatures"].values(): - special_feature = Feature.parse_obj(raw_special_feature) + special_feature = Feature.model_validate(raw_special_feature) GVL_SPECIAL_FEATURES[special_feature.id] = special_feature FEATURES_BY_NAME[special_feature.name] = special_feature @@ -86,8 +86,8 @@ def _load_data() -> None: data_category_data = load(data_category_mapping_file) for raw_data_category in data_category_data.values(): - data_category = GVLDataCategory.parse_obj(raw_data_category) - mapped_data_category = MappedDataCategory.parse_obj(raw_data_category) + data_category = GVLDataCategory.model_validate(raw_data_category) + mapped_data_category = MappedDataCategory.model_validate(raw_data_category) GVL_DATA_CATEGORIES[data_category.id] = data_category MAPPED_GVL_DATA_CATEGORIES[mapped_data_category.id] = mapped_data_category diff --git a/src/fideslang/models.py b/src/fideslang/models.py index edd84815..4b621e45 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -9,6 +9,7 @@ from typing import Dict, List, Optional, Union from warnings import warn +from packaging.version import Version from pydantic import ( field_validator, model_validator, @@ -23,7 +24,6 @@ from fideslang.validation import ( FidesKey, - FidesVersion, check_valid_country_code, deprecated_version_later_than_added, has_versioning_if_default, @@ -114,12 +114,12 @@ class DefaultModel(BaseModel): @classmethod def validate_verion_added(cls, version_added: Optional[str]) -> Optional[str]: """ - Validate that the `version_added` field is a proper FidesVersion + Validate that the `version_added` field is a proper Version """ if not version_added: return None - FidesVersion.validate(version_added) + Version(version_added) return version_added @field_validator("version_deprecated") @@ -133,7 +133,7 @@ def validate_version_deprecated( if not version_deprecated: return None - FidesVersion.validate(version_deprecated) + Version(version_deprecated) return version_deprecated diff --git a/src/fideslang/parse.py b/src/fideslang/parse.py index 94462d94..9c1a25c5 100644 --- a/src/fideslang/parse.py +++ b/src/fideslang/parse.py @@ -19,7 +19,7 @@ def parse_dict( raise SystemExit(1) try: - parsed_manifest = model_map[resource_type].parse_obj(resource) + parsed_manifest = model_map[resource_type].model_validate(resource) except Exception as err: print( "Failed to parse {} from {}:\n{}".format( @@ -34,7 +34,7 @@ def load_manifests_into_taxonomy(raw_manifests: Dict[str, List[Dict]]) -> Taxono """ Parse the raw resource manifests into resource resources. """ - taxonomy = Taxonomy.parse_obj( + taxonomy = Taxonomy.model_validate( { resource_type: [ parse_dict(resource_type, resource) for resource in resource_list diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index 15416ebb..32ed80d9 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -3,37 +3,27 @@ """ import re from collections import Counter -from typing import Dict, Generator, List, Optional, Set, Tuple, Annotated, Pattern +from typing import Dict, List, Optional, Set, Tuple, Annotated, Pattern, Any from packaging.version import Version -from pydantic import FieldValidationInfo, AfterValidator +from pydantic import FieldValidationInfo, GetCoreSchemaHandler +from pydantic_core import CoreSchema, core_schema +from pydantic.functional_validators import PlainValidator from fideslang.default_fixtures import COUNTRY_CODES VALID_COUNTRY_CODES = [country["alpha3Code"] for country in COUNTRY_CODES] +FIDES_KEY_PATTERN = r"^[a-zA-Z0-9_.<>-]+$" class FidesValidationError(ValueError): """Custom exception for when the pydantic ValidationError can't be used.""" -class FidesVersion(Version): - """Validate strings as proper semantic versions.""" - - @classmethod - def __get_validators__(cls) -> Generator: - yield cls.validate - - @classmethod - def validate(cls, value: str) -> Version: - """Validates that the provided string is a valid Semantic Version.""" - return Version(value) - - def fides_key_regex_check(value: str) -> str: """Throws ValueError if val is not a valid FidesKey""" - regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$") + regex: Pattern[str] = re.compile(FIDES_KEY_PATTERN) if not regex.match(value): raise FidesValidationError( f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" @@ -42,8 +32,24 @@ def fides_key_regex_check(value: str) -> str: return value -FIDES_KEY_PATTERN = "^[a-zA-Z0-9_.<>-]+$" -FidesKey = Annotated[str, AfterValidator(fides_key_regex_check)] +class FidesKey(str): + """ + Regex-enforced constrained string. + + Used as a unique identifier within a specific resource type. + """ + + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> CoreSchema: + return core_schema.no_info_after_validator_function(cls, handler(str)) + + @classmethod + def validate(cls, value: str) -> str: + """Validates that the provided string is a valid Semantic Version.""" + fides_key_regex_check(value) + return value def validate_collection_key_parts(value: str) -> str: @@ -53,8 +59,8 @@ def validate_collection_key_parts(value: str) -> str: """ values = value.split(".") if len(values) == 2: - FidesKey(values[0]) - FidesKey(values[1]) + FidesKey.validate(values[0]) + FidesKey.validate(values[1]) else: raise ValueError( "FidesCollection must be specified in the form 'FidesKey.FidesKey'" @@ -63,7 +69,7 @@ def validate_collection_key_parts(value: str) -> str: # Dataset.Collection name where both dataset and collection names are valid FidesKeys -FidesCollectionKey = Annotated[str, AfterValidator(validate_collection_key_parts)] +FidesCollectionKey = Annotated[str, PlainValidator(validate_collection_key_parts)] def sort_list_objects_by_name(values: List) -> List: @@ -108,8 +114,8 @@ def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: def deprecated_version_later_than_added( - version_deprecated: Optional[FidesVersion], info: FieldValidationInfo -) -> Optional[FidesVersion]: + version_deprecated: Optional[Version], info: FieldValidationInfo +) -> Optional[Version]: """ Check to make sure that the deprecated version is later than the added version. @@ -136,19 +142,20 @@ def has_versioning_if_default(is_default: bool, info: FieldValidationInfo) -> bo """ Check to make sure that version fields are set for default items. """ + values = info.data # If it's a default item, it at least needs a starting version if is_default: try: - assert info.data.get("version_added") + assert values.get("version_added") except AssertionError: raise FidesValidationError("Default items must have version information!") # If it's not default, it shouldn't have version info else: try: - assert not info.data.get("version_added") - assert not info.data.get("version_deprecated") - assert not info.data.get("replaced_by") + assert not values.get("version_added") + assert not values.get("version_deprecated") + assert not values.get("replaced_by") except AssertionError: raise FidesValidationError( "Non-default items can't have version information!" diff --git a/tests/conftest.py b/tests/conftest.py index 89a099f5..ad8fbc20 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,14 @@ def resources_dict(): """ resources_dict: Dict[str, Any] = { "data_category": models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom", parent_key="user", name="Custom Data Category", description="Custom Data Category", ), "dataset": models.Dataset( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_sample_db_dataset", name="Sample DB Dataset", description="This is a Sample Database Dataset", @@ -53,13 +53,13 @@ def resources_dict(): ], ), "data_subject": models.DataSubject( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_subject", name="Custom Data Subject", description="Custom Data Subject", ), "data_use": models.DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_data_use", name="Custom Data Use", description="Custom Data Use", @@ -73,7 +73,7 @@ def resources_dict(): description="Test Organization", ), "policy": models.Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", version="1.3", @@ -87,15 +87,15 @@ def resources_dict(): data_subjects=models.PrivacyRule(matches="ANY", values=[]), ), "registry": models.Registry( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_registry", name="Test Registry", description="Test Regsitry", systems=[], ), "system": models.System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registryId="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -120,7 +120,7 @@ def test_manifests(): "dataset": [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -131,7 +131,7 @@ def test_manifests(): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", @@ -143,7 +143,7 @@ def test_manifests(): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -153,7 +153,7 @@ def test_manifests(): "system": [ { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/test_manifests.py b/tests/fideslang/test_manifests.py index 5310624c..9fab7e04 100644 --- a/tests/fideslang/test_manifests.py +++ b/tests/fideslang/test_manifests.py @@ -68,7 +68,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 1", "description": "Test Dataset 1", "fides_key": "some_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -77,7 +77,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 2", "description": "Test Dataset 2", "fides_key": "another_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -86,14 +86,14 @@ def test_union_manifests(test_manifests): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", @@ -122,7 +122,7 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["dataset"], key=lambda x: x["name"]) == [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -132,7 +132,7 @@ def test_ingest_manifests(ingestion_manifest_directory): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -142,14 +142,14 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["system"], key=lambda x: x["name"]) == [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 1a5fae76..aa81d86d 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -102,7 +102,7 @@ def test_system_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", cookies=[{"name": "test_cookie"}], privacy_declarations=[ PrivacyDeclaration( @@ -117,7 +117,7 @@ def test_system_valid(self) -> None: ], ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -153,7 +153,7 @@ def test_system_valid_nested_meta(self) -> None: }, }, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -164,7 +164,7 @@ def test_system_valid_nested_meta(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -189,7 +189,7 @@ def test_system_valid_no_meta(self) -> None: ], # purposefully omitting the `meta` property to ensure it's effectively optional name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -200,7 +200,7 @@ def test_system_valid_no_meta(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -212,7 +212,7 @@ def test_system_valid_no_egress_or_ingress(self) -> None: fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -221,7 +221,7 @@ def test_system_valid_no_egress_or_ingress(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -240,7 +240,7 @@ def test_system_no_egress(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -251,7 +251,7 @@ def test_system_no_egress(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -270,7 +270,7 @@ def test_system_no_ingress(self) -> None: fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -281,7 +281,7 @@ def test_system_no_ingress(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -299,7 +299,7 @@ def test_system_user_ingress_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -309,7 +309,7 @@ def test_system_user_ingress_valid(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -317,11 +317,11 @@ def test_system_user_ingress_valid(self) -> None: def test_expanded_system(self): assert System( fides_key="test_system", - organization_fides_key=1, + organization_fides_key="1", tags=["some", "tags"], name="Exponential Interactive, Inc d/b/a VDX.tv", description="My system test", - registry_id=1, + registry_id="1", meta={"some": "meta stuff"}, system_type="SYSTEM", egress=[ @@ -443,7 +443,7 @@ def test_system_deprecated_fields(self, deprecated_field, value) -> None: "description": "Test System", "fides_key": "test_system", "name": "Test System", - "registry": 1, + "registry": "1", "system_type": "SYSTEM", "privacy_declarations": [], deprecated_field: value, diff --git a/tests/fideslang/test_parse.py b/tests/fideslang/test_parse.py index b94e752b..d8e75171 100644 --- a/tests/fideslang/test_parse.py +++ b/tests/fideslang/test_parse.py @@ -7,13 +7,13 @@ @pytest.mark.unit def test_parse_manifest(): expected_result = models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="some_resource", name="Test resource 1", description="Test Description", ) test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", @@ -26,7 +26,7 @@ def test_parse_manifest(): def test_parse_manifest_no_fides_key_validation_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "name": "Test resource 1", "description": "Test Description", } @@ -38,7 +38,7 @@ def test_parse_manifest_no_fides_key_validation_error(): def test_parse_manifest_resource_type_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 15d4ec1b..4062da7b 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -21,7 +21,11 @@ PrivacyRule, System, ) -from fideslang.validation import FidesKey, FidesValidationError, valid_data_type +from fideslang.validation import ( + FidesKey, + FidesValidationError, + valid_data_type, +) DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject] @@ -35,7 +39,7 @@ def test_default_no_versions_error(self, TaxonomyClass): """There should be version info for default items.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -47,7 +51,7 @@ def test_not_default_no_versions_error(self, TaxonomyClass): """There shouldn't be version info on a non-default item.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -59,7 +63,7 @@ def test_deprecated_when_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -73,7 +77,7 @@ def test_deprecated_after_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -87,7 +91,7 @@ def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: """Try building from a dictionary with explicit None values.""" TaxonomyClass.model_validate( { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "user", "name": "Custom Test Data", "description": "Custom Test Data Category", @@ -102,7 +106,7 @@ def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: def test_built_with_empty_versions(self, TaxonomyClass) -> None: """Try building directly with explicit None values.""" TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -117,7 +121,7 @@ def test_deprecated_not_added(self, TaxonomyClass): """Can't be deprecated without being added in an earlier version.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -130,7 +134,7 @@ def test_replaced_not_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -143,7 +147,7 @@ def test_replaced_not_deprecated(self, TaxonomyClass): def test_replaced_and_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" assert TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -158,7 +162,7 @@ def test_version_error(self, TaxonomyClass): """Check that versions are validated.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -170,7 +174,7 @@ def test_version_error(self, TaxonomyClass): def test_versions_valid(self, TaxonomyClass): """Check that versions are validated.""" assert TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -188,7 +192,7 @@ def test_collections_duplicate_fields_error(): data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -198,7 +202,7 @@ def test_collections_duplicate_fields_error(): data_categories=[], ), DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -221,7 +225,7 @@ def test_dataset_duplicate_collections_error(): data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -246,7 +250,7 @@ def test_dataset_duplicate_collections_error(): @pytest.mark.unit def test_top_level_resource(): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -258,7 +262,7 @@ def test_top_level_resource(): def test_fides_key_doesnt_match_stated_parent_key(): with pytest.raises(ValidationError): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", @@ -270,7 +274,7 @@ def test_fides_key_doesnt_match_stated_parent_key(): @pytest.mark.unit def test_fides_key_matches_stated_parent_key(): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.account.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", @@ -283,7 +287,7 @@ def test_fides_key_matches_stated_parent_key(): def test_no_parent_key_but_fides_key_contains_parent_key(): with pytest.raises(ValidationError): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", @@ -294,7 +298,7 @@ def test_no_parent_key_but_fides_key_contains_parent_key(): @pytest.mark.unit def test_fides_key_with_carets(): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="", name="Example valid key with brackets", description="This key contains a <> which is valid", @@ -306,7 +310,7 @@ def test_fides_key_with_carets(): def test_invalid_chars_in_fides_key(): with pytest.raises(ValidationError): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="!", name="Example invalid key", description="This key contains a ! so it is invalid", @@ -317,7 +321,7 @@ def test_invalid_chars_in_fides_key(): @pytest.mark.unit def test_create_valid_data_category(): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", @@ -330,7 +334,7 @@ def test_create_valid_data_category(): def test_circular_dependency_data_category(): with pytest.raises(ValidationError): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="User Data", description="Test Data Category", @@ -342,7 +346,7 @@ def test_circular_dependency_data_category(): @pytest.mark.unit def test_create_valid_data_use(): DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", parent_key="provide", @@ -355,7 +359,7 @@ def test_create_valid_data_use(): def test_circular_dependency_data_use(): with pytest.raises(ValidationError): DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", description="Test Data Use", @@ -402,8 +406,8 @@ def test_invalid_matches_privacy_rule(): @pytest.mark.unit def test_valid_policy_rule(): assert PolicyRule( - organization_fides_key=1, - policyId=1, + organization_fides_key="1", + policyId="1", fides_key="test_policy", name="Test Policy", description="Test Policy", @@ -416,7 +420,7 @@ def test_valid_policy_rule(): @pytest.mark.unit def test_valid_policy(): Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", version="1.3", @@ -429,8 +433,8 @@ def test_valid_policy(): @pytest.mark.unit def test_create_valid_system(): System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registryId="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -460,8 +464,8 @@ def test_invalid_country_identifier(country_code: str): """Validate some invalid country identifiers raise an error""" with pytest.raises(ValidationError): System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registryId="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -485,8 +489,8 @@ def test_invalid_country_identifier(country_code: str): def test_valid_country_identifier(country_code: str): """Validates usage of alpha-3 codes per ISO 3166""" System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registryId="1", fides_key="test_system", system_type="SYSTEM", name="Test System", From 5f2c86c1da6f038862404115cdc3e4e9188c06f6 Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 13 Nov 2023 17:55:45 +0800 Subject: [PATCH 08/27] fix version and fides_key validation test failures --- .github/workflows/pr_checks.yml | 2 +- src/fideslang/models.py | 71 ++++++++++++++++----------------- src/fideslang/validation.py | 68 +++++++++---------------------- 3 files changed, 55 insertions(+), 86 deletions(-) diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index 4ede4bfd..17a9988d 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -94,7 +94,7 @@ jobs: strategy: matrix: python_version: ["3.8", "3.9", "3.10", "3.11"] - pydantic_version: ["1.8.2", "1.9.2", "1.10.9", "2.3.0"] + pydantic_version: ["2.3.0"] pyyaml_version: ["5.4.1", "6.0"] runs-on: ubuntu-latest continue-on-error: true diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 4b621e45..ee6443be 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -9,12 +9,13 @@ from typing import Dict, List, Optional, Union from warnings import warn -from packaging.version import Version +from packaging.version import Version, InvalidVersion from pydantic import ( field_validator, model_validator, ConfigDict, AnyUrl, + ValidationError, BaseModel, Field, HttpUrl, @@ -27,7 +28,7 @@ check_valid_country_code, deprecated_version_later_than_added, has_versioning_if_default, - is_deprecated_if_replaced, + FidesValidationError, FidesCollectionKey, matching_parent_key, no_self_reference, @@ -43,15 +44,6 @@ ) matching_parent_key_validator = field_validator("parent_key")(matching_parent_key) no_self_reference_validator = field_validator("parent_key")(no_self_reference) -has_versioning_if_default_validator = field_validator("is_default")( - has_versioning_if_default -) -deprecated_version_later_than_added_validator = field_validator( - "version_deprecated", -)(deprecated_version_later_than_added) -is_deprecated_if_replaced_validator = field_validator("replaced_by")( - is_deprecated_if_replaced -) # Reusable Fields name_field = Field(description="Human-Readable name for this resource.") @@ -106,35 +98,42 @@ class DefaultModel(BaseModel): description="Denotes whether the resource is part of the default taxonomy or not.", ) - _has_versioning_if_default = has_versioning_if_default_validator - _deprecated_version_later_than_added = deprecated_version_later_than_added_validator - _is_deprecated_if_replaced = is_deprecated_if_replaced_validator + @model_validator(mode="after") + def verify_version_info(self) -> "DefaultModel": + """Compose all of the version checks into a single validator.""" + version_added = self.version_added + version_deprecated = self.version_deprecated + replaced_by = self.replaced_by + is_default = self.is_default + + if version_added: + try: + Version(version_added) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_added' does not have a valid version: {version_added}" + ) - @field_validator("version_added") - @classmethod - def validate_verion_added(cls, version_added: Optional[str]) -> Optional[str]: - """ - Validate that the `version_added` field is a proper Version - """ - if not version_added: - return None + if version_deprecated: + try: + Version(version_deprecated) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_deprecated' does not have a valid version: {version_deprecated}" + ) - Version(version_added) - return version_added + deprecated_version_later_than_added( + Version(version_deprecated), version_added + ) - @field_validator("version_deprecated") - @classmethod - def validate_version_deprecated( - cls, version_deprecated: Optional[str] - ) -> Optional[str]: - """ - Validate that the `version_deprecated` is a proper FidesVersion - """ - if not version_deprecated: - return None + has_versioning_if_default( + is_default, version_added, version_deprecated, replaced_by + ) + + if replaced_by and not version_deprecated: + raise FidesValidationError("Cannot be replaced without deprecation!") - Version(version_deprecated) - return version_deprecated + return self class DataResponsibilityTitle(str, Enum): diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index 32ed80d9..5d6e9050 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -20,7 +20,7 @@ class FidesValidationError(ValueError): """Custom exception for when the pydantic ValidationError can't be used.""" -def fides_key_regex_check(value: str) -> str: +def validate_fides_key(value: str) -> str: """Throws ValueError if val is not a valid FidesKey""" regex: Pattern[str] = re.compile(FIDES_KEY_PATTERN) @@ -32,24 +32,7 @@ def fides_key_regex_check(value: str) -> str: return value -class FidesKey(str): - """ - Regex-enforced constrained string. - - Used as a unique identifier within a specific resource type. - """ - - @classmethod - def __get_pydantic_core_schema__( - cls, source_type: Any, handler: GetCoreSchemaHandler - ) -> CoreSchema: - return core_schema.no_info_after_validator_function(cls, handler(str)) - - @classmethod - def validate(cls, value: str) -> str: - """Validates that the provided string is a valid Semantic Version.""" - fides_key_regex_check(value) - return value +FidesKey = Annotated[str, PlainValidator(validate_fides_key)] def validate_collection_key_parts(value: str) -> str: @@ -59,8 +42,8 @@ def validate_collection_key_parts(value: str) -> str: """ values = value.split(".") if len(values) == 2: - FidesKey.validate(values[0]) - FidesKey.validate(values[1]) + validate_fides_key(values[0]) + validate_fides_key(values[1]) else: raise ValueError( "FidesCollection must be specified in the form 'FidesKey.FidesKey'" @@ -114,67 +97,54 @@ def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: def deprecated_version_later_than_added( - version_deprecated: Optional[Version], info: FieldValidationInfo -) -> Optional[Version]: + version_deprecated: Version, version_added: Optional[str] +): """ Check to make sure that the deprecated version is later than the added version. This will also catch errors where the deprecated version is defined but the added version is empty. """ + parsed_version_added = Version(version_added) if version_added else Version("0") - if not version_deprecated: - return None - - if version_deprecated < info.data.get("version_added", Version("0")): + if version_deprecated < parsed_version_added: raise FidesValidationError( "Deprecated version number can't be earlier than version added!" ) - if version_deprecated == info.data.get("version_added", Version("0")): + if version_deprecated == parsed_version_added: raise FidesValidationError( "Deprecated version number can't be the same as the version added!" ) - return version_deprecated -def has_versioning_if_default(is_default: bool, info: FieldValidationInfo) -> bool: +def has_versioning_if_default( + is_default: bool, + version_added: Optional[str], + version_deprecated: Optional[str], + replaced_by: Optional[str], +): """ Check to make sure that version fields are set for default items. """ - values = info.data # If it's a default item, it at least needs a starting version if is_default: try: - assert values.get("version_added") + assert version_added except AssertionError: raise FidesValidationError("Default items must have version information!") # If it's not default, it shouldn't have version info else: try: - assert not values.get("version_added") - assert not values.get("version_deprecated") - assert not values.get("replaced_by") + assert not version_added + assert not version_deprecated + assert not replaced_by except AssertionError: raise FidesValidationError( "Non-default items can't have version information!" ) - return is_default - - -def is_deprecated_if_replaced(replaced_by: str, info: FieldValidationInfo) -> str: - """ - Check to make sure that the item has been deprecated if there is a replacement. - """ - values = info.data - - if replaced_by and not values.get("version_deprecated"): - raise FidesValidationError("Cannot be replaced without deprecation!") - - return replaced_by - def matching_parent_key(parent_key: FidesKey, info: FieldValidationInfo) -> FidesKey: """ From a718e17e393a2c2ab5aff440166c363a158767b5 Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 13 Nov 2023 18:00:51 +0800 Subject: [PATCH 09/27] fix missing defaults --- src/fideslang/models.py | 3 ++- tests/fideslang/test_relationships.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index ee6443be..7be62472 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -48,7 +48,7 @@ # Reusable Fields name_field = Field(description="Human-Readable name for this resource.") description_field = Field( - description="A detailed description of what this resource is." + default=None, description="A detailed description of what this resource is." ) meta_field = Field( default=None, @@ -524,6 +524,7 @@ class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat): fides_meta: Optional[FidesMeta] = None fields: Optional[List[DatasetField]] = Field( + default=None, description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).", ) diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index 54ca20ac..ca546382 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -108,7 +108,7 @@ def test_find_referenced_fides_keys_1(self) -> None: assert referenced_keys == set(expected_referenced_key) def test_find_referenced_fides_keys_2(self) -> None: - test_system = System.construct( + test_system = System.model_construct( name="test_dc", fides_key="test_dc", description="test description", @@ -158,7 +158,7 @@ def test_get_referenced_missing_keys(self): ), ], system=[ - System.construct( + System.model_construct( name="test_system", fides_key="test_system", description="test description", From c329f44decc1cdb5761d531101f0089e215e5fef Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 13 Nov 2023 18:13:47 +0800 Subject: [PATCH 10/27] clean up more test failures --- src/fideslang/models.py | 49 +++++++++++++++++++----------- tests/fideslang/test_models.py | 4 --- tests/fideslang/test_validation.py | 21 +++++++------ 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 7be62472..bdfed112 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -565,12 +565,12 @@ def validate_object_fields( if fields and declared_data_type: data_type, _ = parse_data_type_string(declared_data_type) if data_type != "object": - raise ValueError( + raise FidesValidationError( f"The data type '{data_type}' on field '{field_name}' is not compatible with specified sub-fields. Convert to an 'object' field." ) if (fields or declared_data_type == "object") and values.get("data_categories"): - raise ValueError( + raise FidesValidationError( f"Object field '{field_name}' cannot have specified data_categories. Specify category on sub-field instead" ) @@ -922,47 +922,57 @@ class PrivacyDeclaration(BaseModel): """ name: Optional[str] = Field( + default=None, description="The name of the privacy declaration on the system.", ) data_categories: List[FidesKey] = Field( + default=[], description="An array of data categories describing a system in a privacy declaration.", ) data_use: FidesKey = Field( description="The Data Use describing a system in a privacy declaration.", ) data_subjects: List[FidesKey] = Field( - default_factory=list, + default=[], description="An array of data subjects describing a system in a privacy declaration.", ) dataset_references: Optional[List[FidesKey]] = Field( + default=None, description="Referenced Dataset fides keys used by the system.", ) egress: Optional[List[FidesKey]] = Field( - description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) ingress: Optional[List[FidesKey]] = Field( - description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) features: List[str] = Field( - default_factory=list, description="The features of processing personal data." + default=[], description="The features of processing personal data." ) flexible_legal_basis_for_processing: Optional[bool] = Field( + default=None, description="Whether the legal basis for processing is 'flexible' (i.e. can be overridden in a privacy notice) for this declaration.", ) legal_basis_for_processing: Optional[LegalBasisForProcessingEnum] = Field( - description="The legal basis under which personal data is processed for this purpose." + default=None, + description="The legal basis under which personal data is processed for this purpose.", ) impact_assessment_location: Optional[str] = Field( - description="Where the legitimate interest impact assessment is stored" + default=None, + description="Where the legitimate interest impact assessment is stored", ) retention_period: Optional[str] = Field( - description="An optional string to describe the time period for which data is retained for this purpose." + default=None, + description="An optional string to describe the time period for which data is retained for this purpose.", ) processes_special_category_data: bool = Field( default=False, description="This system processes special category data", ) special_category_legal_basis: Optional[SpecialCategoryLegalBasisEnum] = Field( + default=None, description="The legal basis under which the special category data is processed.", ) data_shared_with_third_parties: bool = Field( @@ -970,13 +980,15 @@ class PrivacyDeclaration(BaseModel): description="This system shares data with third parties for this purpose.", ) third_parties: Optional[str] = Field( + default=None, description="The types of third parties the data is shared with.", ) shared_categories: List[str] = Field( - default_factory=list, + default=[], description="The categories of personal data that this system shares with third parties.", ) cookies: Optional[List[Cookies]] = Field( + default=[], description="Cookies associated with this data use to deliver services and functionality", ) model_config = ConfigDict(from_attributes=True) @@ -1080,10 +1092,10 @@ class System(FidesModel): description="Deprecated. The responsibility or role over the system that processes personal data", ) egress: Optional[List[DataFlow]] = Field( - description="The resources to which the system sends data." + default=None, description="The resources to which the system sends data." ) ingress: Optional[List[DataFlow]] = Field( - description="The resources from which the system receives data." + default=None, description="The resources from which the system receives data." ) privacy_declarations: List[PrivacyDeclaration] = Field( description=PrivacyDeclaration.__doc__, @@ -1113,10 +1125,11 @@ class System(FidesModel): description="The unique identifier for the vendor that's associated with this system.", ) previous_vendor_id: Optional[str] = Field( - description="If specified, the unique identifier for the vendor that was previously associated with this system." + default=None, + description="If specified, the unique identifier for the vendor that was previously associated with this system.", ) dataset_references: List[FidesKey] = Field( - default_factory=list, + default=[], description="Referenced Dataset fides keys used by the system.", ) processes_personal_data: bool = Field( @@ -1136,7 +1149,7 @@ class System(FidesModel): description="Whether the vendor uses data to profile a consumer in a way that has a legal effect.", ) legal_basis_for_profiling: List[LegalBasisForProfilingEnum] = Field( - default_factory=list, + default=[], description="The legal basis (or bases) for performing profiling that has a legal effect.", ) does_international_transfers: bool = Field( @@ -1171,7 +1184,7 @@ class System(FidesModel): description="The legal address for the business represented by the system.", ) responsibility: List[DataResponsibilityTitle] = Field( - default_factory=list, + default=[], description=DataResponsibilityTitle.__doc__, ) dpo: Optional[str] = Field( @@ -1199,9 +1212,11 @@ class System(FidesModel): description="Whether the system uses non-cookie methods of storage or accessing information stored on a user's device.", ) legitimate_interest_disclosure_url: Optional[AnyUrl] = Field( - description="A URL that points to the system's publicly accessible legitimate interest disclosure." + default=None, + description="A URL that points to the system's publicly accessible legitimate interest disclosure.", ) cookies: Optional[List[Cookies]] = Field( + default=None, description="System-level cookies unassociated with a data use to deliver services and functionality", ) diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index aa81d86d..99347be4 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -68,11 +68,7 @@ def test_dataflow_invalid_type(self) -> None: class TestPrivacyDeclaration: def test_privacydeclaration_valid(self) -> None: assert PrivacyDeclaration( - data_categories=[], - data_subjects=[], data_use="provide", - egress=[], - ingress=[], name="declaration-name", ) diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 4062da7b..145c3a65 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -22,8 +22,8 @@ System, ) from fideslang.validation import ( - FidesKey, FidesValidationError, + validate_fides_key, valid_data_type, ) @@ -512,12 +512,12 @@ def test_valid_country_identifier(country_code: str): @pytest.mark.unit def test_fides_key_validate_bad_key(): with pytest.raises(FidesValidationError): - FidesKey.validate("hi!") + validate_fides_key("hi!") @pytest.mark.unit def test_fides_key_validate_good_key(): - FidesKey.validate("hello_test_file.txt") + validate_fides_key("hello_test_file.txt") @pytest.mark.unit @@ -715,9 +715,10 @@ def test_data_categories_at_object_level(self): ), fields=[DatasetField(name="nested_field")], ) - assert "Object field 'test_field' cannot have specified data_categories" in str( - exc - ) + assert ( + "Object field 'test_field' cannot have specified data_categories" + in str(exc) + ) def test_object_field_conflicting_types(self): with pytest.raises(ValidationError) as exc: @@ -735,10 +736,10 @@ def test_object_field_conflicting_types(self): ), fields=[DatasetField(name="nested_field")], ) - assert ( - "The data type 'string' on field 'test_field' is not compatible with specified sub-fields." - in str(exc) - ) + assert ( + "The data type 'string' on field 'test_field' is not compatible with specified sub-fields." + in str(exc) + ) def test_data_categories_on_nested_fields(self): DatasetField( From bfb8d3df24addcde114f2a99e021ee439f7b8378 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 00:04:54 +0800 Subject: [PATCH 11/27] fix more errors --- src/fideslang/models.py | 43 ++++++++++++++++++++++--------------- src/fideslang/validation.py | 9 ++++---- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index bdfed112..1c1e5afb 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -9,27 +9,26 @@ from typing import Dict, List, Optional, Union from warnings import warn -from packaging.version import Version, InvalidVersion +from packaging.version import InvalidVersion, Version from pydantic import ( - field_validator, - model_validator, - ConfigDict, AnyUrl, - ValidationError, BaseModel, + ConfigDict, Field, - HttpUrl, FieldValidationInfo, + HttpUrl, PositiveInt, + field_validator, + model_validator, ) from fideslang.validation import ( + FidesCollectionKey, FidesKey, + FidesValidationError, check_valid_country_code, deprecated_version_later_than_added, has_versioning_if_default, - FidesValidationError, - FidesCollectionKey, matching_parent_key, no_self_reference, parse_data_type_string, @@ -493,7 +492,6 @@ class FidesMeta(BaseModel): @field_validator("data_type") @classmethod - @classmethod def valid_data_type(cls, value: Optional[str]) -> Optional[str]: """Validate that all annotated data types exist in the taxonomy""" return valid_data_type(value) @@ -598,16 +596,18 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", ) retention: Optional[str] = Field( + default=None, description="An optional string to describe the retention policy for a Dataset collection. This field can also be applied more granularly at the field level of a Dataset.", ) fields: List[DatasetField] = Field( description="An array of objects that describe the collection's fields.", ) - fides_meta: Optional[CollectionMeta] = None + fides_meta: Optional[CollectionMeta] = Field(default=None) _sort_fields = field_validator("fields")(sort_list_objects_by_name) _unique_items_in_list = field_validator("fields")(unique_items_in_list) @@ -658,18 +658,23 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): meta: Optional[Dict] = meta_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", ) fides_meta: Optional[DatasetMetadata] = Field( - description=DatasetMetadata.__doc__, default=None + default=None, + description=DatasetMetadata.__doc__, ) joint_controller: Optional[ContactDetails] = Field( + default=None, description="Deprecated. " + (ContactDetails.__doc__ or ""), ) retention: Optional[str] = Field( + default=None, description="Deprecated. An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) third_country_transfers: Optional[List[str]] = Field( + default=None, description="Deprecated. An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).", ) collections: List[DatasetCollection] = Field( @@ -1002,13 +1007,16 @@ class SystemMetadata(BaseModel): """ resource_id: Optional[str] = Field( - description="The external resource id for the system being modeled." + default=None, + description="The external resource id for the system being modeled.", ) endpoint_address: Optional[str] = Field( - description="The host of the external resource for the system being modeled." + default=None, + description="The host of the external resource for the system being modeled.", ) endpoint_port: Optional[str] = Field( - description="The port of the external resource for the system being modeled." + default=None, + description="The port of the external resource for the system being modeled.", ) @@ -1128,8 +1136,8 @@ class System(FidesModel): default=None, description="If specified, the unique identifier for the vendor that was previously associated with this system.", ) - dataset_references: List[FidesKey] = Field( - default=[], + dataset_references: Optional[List[FidesKey]] = Field( + default=None, description="Referenced Dataset fides keys used by the system.", ) processes_personal_data: bool = Field( @@ -1198,7 +1206,8 @@ class System(FidesModel): default=None, description="The data security practices employed by this system." ) cookie_max_age_seconds: Optional[int] = Field( - description="The maximum storage duration, in seconds, for cookies used by this system." + default=None, + description="The maximum storage duration, in seconds, for cookies used by this system.", ) uses_cookies: bool = Field( default=False, description="Whether this system uses cookie storage." diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index 5d6e9050..5e59e158 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -3,11 +3,10 @@ """ import re from collections import Counter -from typing import Dict, List, Optional, Set, Tuple, Annotated, Pattern, Any +from typing import Annotated, Dict, List, Optional, Pattern, Set, Tuple from packaging.version import Version -from pydantic import FieldValidationInfo, GetCoreSchemaHandler -from pydantic_core import CoreSchema, core_schema +from pydantic import FieldValidationInfo from pydantic.functional_validators import PlainValidator from fideslang.default_fixtures import COUNTRY_CODES @@ -98,7 +97,7 @@ def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: def deprecated_version_later_than_added( version_deprecated: Version, version_added: Optional[str] -): +) -> None: """ Check to make sure that the deprecated version is later than the added version. @@ -123,7 +122,7 @@ def has_versioning_if_default( version_added: Optional[str], version_deprecated: Optional[str], replaced_by: Optional[str], -): +) -> None: """ Check to make sure that version fields are set for default items. """ From 25e5b8030e8c25a03991366060c0572465605e4f Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:13:33 +0800 Subject: [PATCH 12/27] fix parent key validation --- src/fideslang/models.py | 26 ++++- src/fideslang/validation.py | 31 +++-- tests/fideslang/test_validation.py | 179 +++++++++++++---------------- 3 files changed, 113 insertions(+), 123 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 1c1e5afb..e278f7b2 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -41,8 +41,6 @@ country_code_validator = field_validator("third_country_transfers")( check_valid_country_code ) -matching_parent_key_validator = field_validator("parent_key")(matching_parent_key) -no_self_reference_validator = field_validator("parent_key")(no_self_reference) # Reusable Fields name_field = Field(description="Human-Readable name for this resource.") @@ -274,8 +272,16 @@ class DataCategory(FidesModel, DefaultModel): parent_key: Optional[FidesKey] = None - _matching_parent_key = matching_parent_key_validator - _no_self_reference = no_self_reference_validator + @model_validator(mode="after") + def parent_key_checks(self) -> "DataCategory": + """Verify that the parent key is valid.""" + fides_key = self.fides_key + parent_key = self.parent_key + + no_self_reference(parent_key=parent_key, fides_key=fides_key) + matching_parent_key(parent_key=parent_key, fides_key=fides_key) + + return self class Cookies(BaseModel): @@ -356,8 +362,16 @@ class DataUse(FidesModel, DefaultModel): description="Deprecated. A url pointing to the legitimate interest impact assessment. Required if the legal bases used is legitimate interest.", ) - _matching_parent_key = matching_parent_key_validator - _no_self_reference = no_self_reference_validator + @model_validator(mode="after") + def parent_key_checks(self) -> "DataUse": + """Verify that the parent key is valid.""" + fides_key = self.fides_key + parent_key = self.parent_key + + no_self_reference(parent_key=parent_key, fides_key=fides_key) + matching_parent_key(parent_key=parent_key, fides_key=fides_key) + + return self @model_validator(mode="before") @classmethod diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index 5e59e158..be08587a 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -6,7 +6,6 @@ from typing import Annotated, Dict, List, Optional, Pattern, Set, Tuple from packaging.version import Version -from pydantic import FieldValidationInfo from pydantic.functional_validators import PlainValidator from fideslang.default_fixtures import COUNTRY_CODES @@ -82,19 +81,6 @@ def unique_items_in_list(values: List) -> List: return values -def no_self_reference(value: FidesKey, info: FieldValidationInfo) -> FidesKey: - """ - Check to make sure that the fides_key doesn't match other fides_key - references within an object. - - i.e. DataCategory.parent_key != DataCategory.fides_key - """ - fides_key = FidesKey(info.data.get("fides_key", "")) - if value == fides_key: - raise FidesValidationError("FidesKey can not self-reference!") - return value - - def deprecated_version_later_than_added( version_deprecated: Version, version_added: Optional[str] ) -> None: @@ -145,17 +131,27 @@ def has_versioning_if_default( ) -def matching_parent_key(parent_key: FidesKey, info: FieldValidationInfo) -> FidesKey: +def no_self_reference(parent_key: Optional[str], fides_key: str) -> None: + """ + Check to make sure that the fides_key doesn't match other fides_key + references within an object. + + i.e. DataCategory.parent_key != DataCategory.fides_key + """ + if parent_key == fides_key: + raise FidesValidationError("FidesKey can not self-reference!") + + +def matching_parent_key(parent_key: Optional[str], fides_key: str) -> None: """ Confirm that the parent_key matches the parent parsed from the FidesKey. """ - fides_key = FidesKey(info.data.get("fides_key", "")) split_fides_key = str(fides_key).split(".") # Check if it is a top-level resource if len(split_fides_key) == 1 and not parent_key: - return parent_key + return # Reform the parent_key from the fides_key and compare parent_key_from_fides_key = ".".join(split_fides_key[:-1]) @@ -165,7 +161,6 @@ def matching_parent_key(parent_key: FidesKey, info: FieldValidationInfo) -> Fide parent_key, parent_key_from_fides_key, fides_key ) ) - return parent_key def check_valid_country_code(country_code_list: List) -> List: diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 145c3a65..322a96a6 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -248,139 +248,120 @@ def test_dataset_duplicate_collections_error(): @pytest.mark.unit -def test_top_level_resource(): - DataCategory( - organization_fides_key="1", - fides_key="user", - name="Custom Test Data", - description="Custom Test Data Category", - ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_doesnt_match_stated_parent_key(): - with pytest.raises(ValidationError): +class TestFidesKeyValidation: + def test_top_level_resource(self): DataCategory( organization_fides_key="1", - fides_key="user.custom_test_data", + fides_key="user", name="Custom Test Data", description="Custom Test Data Category", - parent_key="user.account", ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_matches_stated_parent_key(): - DataCategory( - organization_fides_key="1", - fides_key="user.account.custom_test_data", - name="Custom Test Data", - description="Custom Test Data Category", - parent_key="user.account", - ) - assert DataCategory + assert DataCategory + def test_fides_key_doesnt_match_stated_parent_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + parent_key="user.account", + ) + assert DataCategory -@pytest.mark.unit -def test_no_parent_key_but_fides_key_contains_parent_key(): - with pytest.raises(ValidationError): + def test_fides_key_matches_stated_parent_key(self): DataCategory( organization_fides_key="1", - fides_key="user.custom_test_data", + fides_key="user.account.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", + parent_key="user.account", ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_with_carets(): - DataCategory( - organization_fides_key="1", - fides_key="", - name="Example valid key with brackets", - description="This key contains a <> which is valid", - ) - assert DataCategory + assert DataCategory + def test_no_parent_key_but_fides_key_contains_parent_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + ) + assert DataCategory -@pytest.mark.unit -def test_invalid_chars_in_fides_key(): - with pytest.raises(ValidationError): + def test_fides_key_with_carets(self): DataCategory( organization_fides_key="1", - fides_key="!", - name="Example invalid key", - description="This key contains a ! so it is invalid", + fides_key="", + name="Example valid key with brackets", + description="This key contains a <> which is valid", ) - assert DataCategory - - -@pytest.mark.unit -def test_create_valid_data_category(): - DataCategory( - organization_fides_key="1", - fides_key="user.custom_test_data", - name="Custom Test Data", - description="Custom Test Data Category", - parent_key="user", - ) - assert DataCategory + assert DataCategory + def test_invalid_chars_in_fides_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="!", + name="Example invalid key", + description="This key contains a ! so it is invalid", + ) + assert DataCategory -@pytest.mark.unit -def test_circular_dependency_data_category(): - with pytest.raises(ValidationError): + def test_create_valid_data_category(self): DataCategory( organization_fides_key="1", - fides_key="user", - name="User Data", - description="Test Data Category", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", parent_key="user", ) - assert True - - -@pytest.mark.unit -def test_create_valid_data_use(): - DataUse( - organization_fides_key="1", - fides_key="provide.service", - name="Provide the Product or Service", - parent_key="provide", - description="Test Data Use", - ) - assert True + assert DataCategory + def test_circular_dependency_data_category(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user", + name="User Data", + description="Test Data Category", + parent_key="user", + ) + assert True -@pytest.mark.unit -def test_circular_dependency_data_use(): - with pytest.raises(ValidationError): + def test_create_valid_data_use(self): DataUse( organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", + parent_key="provide", description="Test Data Use", - parent_key="provide.service", ) - assert True + assert True + def test_circular_dependency_data_use(self): + with pytest.raises(ValidationError): + DataUse( + organization_fides_key="1", + fides_key="provide.service", + name="Provide the Product or Service", + description="Test Data Use", + parent_key="provide.service", + ) + assert True -@pytest.mark.unit -@pytest.mark.parametrize("fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"]) -def test_fides_model_valid(fides_key: str): - fides_key = FidesModel(fides_key=fides_key, name="Foo Bar") - assert fides_key - + @pytest.mark.parametrize( + "fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"] + ) + def test_fides_model_valid(self, fides_key: str): + fides_model = FidesModel(fides_key=fides_key, name="Foo Bar") + assert fides_model -@pytest.mark.unit -@pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) -def test_fides_model_fides_key_invalid(fides_key): - """Check for a bunch of different possible bad characters here.""" - with pytest.raises(ValidationError): - FidesModel(fides_key=fides_key) + @pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) + def test_fides_model_fides_key_invalid(self, fides_key: str): + """Check for a bunch of different possible bad characters here.""" + with pytest.raises(ValidationError): + FidesModel(fides_key=fides_key) @pytest.mark.unit From 73edf8b95e1c74c1f5cd0d4a9e7c7e0ad0603754 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:22:48 +0800 Subject: [PATCH 13/27] get everything passing (by removing two tests) --- noxfile.py | 2 +- tests/fideslang/test_models.py | 60 ---------------------------------- 2 files changed, 1 insertion(+), 61 deletions(-) diff --git a/noxfile.py b/noxfile.py index d350324e..8b99a4b6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,7 +4,7 @@ nox.options.reuse_existing_virtualenvs = True TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] -TESTED_PYDANTIC_VERSIONS = ["2.3.0"] +TESTED_PYDANTIC_VERSIONS = ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 99347be4..8b1231ca 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -222,66 +222,6 @@ def test_system_valid_no_egress_or_ingress(self) -> None: tags=["some", "tags"], ) - def test_system_no_egress(self) -> None: - with raises(ValueError): - assert System( - description="Test Policy", - fides_key="test_system", - ingress=[ - DataFlow( - fides_key="test_system_3", - type="system", - data_categories=[], - ) - ], - meta={"some": "meta stuff"}, - name="Test System", - organization_fides_key="1", - privacy_declarations=[ - PrivacyDeclaration( - data_categories=[], - data_subjects=[], - data_use="provide", - egress=["test_system_2"], - ingress=["test_system_3"], - name="declaration-name", - ) - ], - registry_id="1", - system_type="SYSTEM", - tags=["some", "tags"], - ) - - def test_system_no_ingress(self) -> None: - with raises(ValueError): - assert System( - description="Test Policy", - egress=[ - DataFlow( - fides_key="test_system_2", - type="system", - data_categories=[], - ) - ], - fides_key="test_system", - meta={"some": "meta stuff"}, - name="Test System", - organization_fides_key="1", - privacy_declarations=[ - PrivacyDeclaration( - data_categories=[], - data_subjects=[], - data_use="provide", - egress=["test_system_2"], - ingress=["test_system_3"], - name="declaration-name", - ) - ], - registry_id="1", - system_type="SYSTEM", - tags=["some", "tags"], - ) - def test_system_user_ingress_valid(self) -> None: assert System( description="Test Policy", From 126e4befac59467f2e08654428476acd8893c755 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:24:21 +0800 Subject: [PATCH 14/27] feat: update CI checks for new pydantic versions --- .github/workflows/pr_checks.yml | 2 +- noxfile.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index 17a9988d..ecb44b2e 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -94,7 +94,7 @@ jobs: strategy: matrix: python_version: ["3.8", "3.9", "3.10", "3.11"] - pydantic_version: ["2.3.0"] + pydantic_version: ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] pyyaml_version: ["5.4.1", "6.0"] runs-on: ubuntu-latest continue-on-error: true diff --git a/noxfile.py b/noxfile.py index 8b99a4b6..f2e5a258 100644 --- a/noxfile.py +++ b/noxfile.py @@ -3,6 +3,7 @@ nox.options.sessions = [] nox.options.reuse_existing_virtualenvs = True +# These should match what is in the `pr_checks.yml` file for CI runs TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] TESTED_PYDANTIC_VERSIONS = ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] From cc4d782b5ee201fe7546903a46506866a5afb86c Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:30:11 +0800 Subject: [PATCH 15/27] update requirements file, remove python 3.8 and add 3.12 to supported versions --- .github/workflows/pr_checks.yml | 2 +- Dockerfile | 2 +- noxfile.py | 2 +- requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index ecb44b2e..bad5ab3c 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -93,7 +93,7 @@ jobs: Pytest-Matrix: strategy: matrix: - python_version: ["3.8", "3.9", "3.10", "3.11"] + python_version: ["3.9", "3.10", "3.11", "3.12"] pydantic_version: ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] pyyaml_version: ["5.4.1", "6.0"] runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index 87c40171..9984e581 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-slim-bullseye as base +FROM python:3.10-slim-bullseye as base # Update pip in the base image since we'll use it everywhere RUN pip install -U pip diff --git a/noxfile.py b/noxfile.py index f2e5a258..95bc8217 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,7 +4,7 @@ nox.options.reuse_existing_virtualenvs = True # These should match what is in the `pr_checks.yml` file for CI runs -TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] +TESTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"] TESTED_PYDANTIC_VERSIONS = ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] diff --git a/requirements.txt b/requirements.txt index 1866b067..82fdc338 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pydantic>=2.0.3,<=2.3.0 +pydantic>=2.2.1,<=2.6.0 pyyaml>=5,<7 packaging>=20.0 From ba08cf13ae96cff452f553f4180eb8022896451f Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:40:12 +0800 Subject: [PATCH 16/27] remove python 3.12 from the matrix --- .github/workflows/pr_checks.yml | 4 ++-- Dockerfile | 2 +- dev-requirements.txt | 2 +- noxfile.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index bad5ab3c..0ab63d87 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -93,9 +93,9 @@ jobs: Pytest-Matrix: strategy: matrix: - python_version: ["3.9", "3.10", "3.11", "3.12"] + python_version: ["3.9", "3.10", "3.11"] pydantic_version: ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] - pyyaml_version: ["5.4.1", "6.0"] + pyyaml_version: ["5.4.1", "6.0.1"] runs-on: ubuntu-latest continue-on-error: true steps: diff --git a/Dockerfile b/Dockerfile index 9984e581..5b0aa15c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim-bullseye as base +FROM python:3.9-slim-bullseye as base # Update pip in the base image since we'll use it everywhere RUN pip install -U pip diff --git a/dev-requirements.txt b/dev-requirements.txt index f91fc28a..3bdcd709 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,7 @@ black==23.3.0 mypy==1.4.0 nox>=2023 packaging>=22.0 -pre-commit==2.9.3 +pre-commit==3.5.0 pylint==2.10.0 pytest==7.3.1 pytest-cov==2.11.1 diff --git a/noxfile.py b/noxfile.py index 95bc8217..f14f82c4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,9 +4,9 @@ nox.options.reuse_existing_virtualenvs = True # These should match what is in the `pr_checks.yml` file for CI runs -TESTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12"] +TESTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11"] TESTED_PYDANTIC_VERSIONS = ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] -TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] +TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0.1"] def install_requirements(session: nox.Session) -> None: From 12ef6a858713d6874f7646859e9591ee54b7a800 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 15:51:03 +0800 Subject: [PATCH 17/27] fix static checks --- src/fideslang/models.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index e278f7b2..e6934e1b 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -15,7 +15,7 @@ BaseModel, ConfigDict, Field, - FieldValidationInfo, + ValidationInfo, HttpUrl, PositiveInt, field_validator, @@ -396,7 +396,7 @@ def deprecate_fields(cls, values: Dict) -> Dict: @field_validator("legitimate_interest") @classmethod - def set_legitimate_interest(cls, value: bool, info: FieldValidationInfo) -> bool: + def set_legitimate_interest(cls, value: bool, info: ValidationInfo) -> bool: """Sets if a legitimate interest is used.""" values = info.data @@ -406,9 +406,7 @@ def set_legitimate_interest(cls, value: bool, info: FieldValidationInfo) -> bool @field_validator("legitimate_interest_impact_assessment") @classmethod - def ensure_impact_assessment( - cls, value: AnyUrl, info: FieldValidationInfo - ) -> AnyUrl: + def ensure_impact_assessment(cls, value: AnyUrl, info: ValidationInfo) -> AnyUrl: """ Validates an impact assessment is applied if a legitimate interest has been defined. @@ -561,7 +559,7 @@ def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: def validate_object_fields( cls, fields: Optional[List["DatasetField"]], - info: FieldValidationInfo, + info: ValidationInfo, ) -> Optional[List["DatasetField"]]: """Two validation checks for object fields: - If there are sub-fields specified, type should be either empty or 'object' @@ -1274,7 +1272,7 @@ def deprecate_fields(cls, values: Dict) -> Dict: def privacy_declarations_reference_data_flows( cls, value: PrivacyDeclaration, - info: FieldValidationInfo, + info: ValidationInfo, ) -> PrivacyDeclaration: """ Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must From 63d279507cb89b0d4e816b9cd353e8c745c60f78 Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 14 Nov 2023 16:37:52 +0800 Subject: [PATCH 18/27] docs: changelog --- CHANGELOG.md | 9 ++++++++- pyproject.toml | 3 +-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 695be673..f216ab88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,16 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fideslang/compare/2.2.1...main) +## [3.0.0 Unreleased] + +### Changed + +- Updated to Pydantic 2.X, which is now the only supported version [#160](https://github.com/ethyca/fideslang/pull/160) +- Removed Python 3.8 from the list of supported versions [#160](https://github.com/ethyca/fideslang/pull/160) + ## [2.2.1](https://github.com/ethyca/fideslang/compare/2.2.0...2.2.1) -### Added +### Added - Added a `System.cookies` property to support `Cookie` records explicitly associated with a `System` generally [#181](https://github.com/ethyca/fideslang/pull/181) - Added a `System.previous_vendor_id` property to support to associate a `System` record with a "deprecated" vendor record [#182](https://github.com/ethyca/fideslang/pull/182) diff --git a/pyproject.toml b/pyproject.toml index 47a26082..c9befed7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,13 +9,12 @@ name = "fideslang" description = "Fides Taxonomy Language" dynamic = ["dependencies", "version"] readme = "README.md" -requires-python = ">=3.8, <4" +requires-python = ">=3.9, <4" authors = [{ name = "Ethyca, Inc.", email = "fidesteam@ethyca.com" }] license = { text = "Apache License 2.0" } classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", From 40d4faa58348dafe48af1da42faa2c2a7b18fd4f Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 27 Nov 2023 21:49:38 +0800 Subject: [PATCH 19/27] re-add tests and fix privacy declaration checks --- src/fideslang/models.py | 38 ++++++++--------- tests/fideslang/test_models.py | 76 +++++++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 22 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index e6934e1b..ea40f907 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -1267,33 +1267,31 @@ def deprecate_fields(cls, values: Dict) -> Dict: ) return values - @field_validator("privacy_declarations", check_fields=True) - @classmethod - def privacy_declarations_reference_data_flows( - cls, - value: PrivacyDeclaration, - info: ValidationInfo, - ) -> PrivacyDeclaration: + @model_validator(mode="after") + def verify_privacy_declarations(self) -> "System": """ Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must only reference the `fides_key`s of defined `DataFlow`s in said field(s). """ - for direction in ["egress", "ingress"]: - fides_keys = getattr(value, direction, None) - if fides_keys is not None: - data_flows = info.data[direction] - system = info.data["fides_key"] - assert ( - data_flows is not None and len(data_flows) > 0 - ), f"PrivacyDeclaration '{value.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." + if self.privacy_declarations: + for declaration in self.privacy_declarations: + for direction in ["egress", "ingress"]: + flow_fides_keys = getattr(declaration, direction, None) + if flow_fides_keys is not None: + data_flows = getattr(self, direction) + system = self.fides_key - for fides_key in fides_keys: - assert fides_key in [ - data_flow.fides_key for data_flow in data_flows - ], f"PrivacyDeclaration '{value.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." + assert ( + data_flows is not None and len(data_flows) > 0 + ), f"PrivacyDeclaration '{declaration.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." - return value + for fides_key in flow_fides_keys: + assert fides_key in [ + data_flow.fides_key for data_flow in data_flows + ], f"PrivacyDeclaration '{declaration.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." + + return self model_config = ConfigDict(use_enum_values=True) diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 8b1231ca..c6519971 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -196,13 +196,17 @@ def test_system_valid_no_meta(self) -> None: name="declaration-name", ) ], - registry_id="1", + registry_id=1, system_type="SYSTEM", tags=["some", "tags"], ) assert system.meta == None def test_system_valid_no_egress_or_ingress(self) -> None: + """ + If there are no ingress/egress at the System level, as well as + none at the PrivacyDeclaration level, it is valid. + """ assert System( description="Test Policy", fides_key="test_system", @@ -217,11 +221,79 @@ def test_system_valid_no_egress_or_ingress(self) -> None: name="declaration-name", ) ], - registry_id="1", + registry_id=1, system_type="SYSTEM", tags=["some", "tags"], ) + def test_system_no_egress(self) -> None: + """ + If there is an ingress or egress at the PrivacyDeclaration level that + isn't at the system level, we should get a validation error. + """ + with raises(ValueError): + assert System( + description="Test Policy", + fides_key="test_system", + ingress=[ + DataFlow( + fides_key="test_system_3", + type="system", + data_categories=[], + ) + ], + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key="1", + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + + def test_system_no_ingress(self) -> None: + """ + If there is an ingress or egress at the PrivacyDeclaration level that + isn't at the system level, we should get a validation error. + """ + with raises(ValueError): + assert System( + description="Test Policy", + egress=[ + DataFlow( + fides_key="test_system_2", + type="system", + data_categories=[], + ) + ], + fides_key="test_system", + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key="1", + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + def test_system_user_ingress_valid(self) -> None: assert System( description="Test Policy", From 537537c1225dd3e40ba3d25d33348062fcddc47d Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 27 Nov 2023 22:08:44 +0800 Subject: [PATCH 20/27] fix flexible default test --- src/fideslang/models.py | 6 +++--- tests/fideslang/test_models.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 9e971fe8..77e4b63c 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -283,6 +283,7 @@ def parent_key_checks(self) -> "DataCategory": return self + class Cookies(BaseModel): """The Cookies resource model""" @@ -967,8 +968,8 @@ class PrivacyDeclaration(BaseModel): features: List[str] = Field( default=[], description="The features of processing personal data." ) - flexible_legal_basis_for_processing: Optional[bool] = Field( - default=None, + flexible_legal_basis_for_processing: bool = Field( + default=True, description="Whether the legal basis for processing is 'flexible' (i.e. can be overridden in a privacy notice) for this declaration.", ) legal_basis_for_processing: Optional[LegalBasisForProcessingEnum] = Field( @@ -1010,7 +1011,6 @@ class PrivacyDeclaration(BaseModel): model_config = ConfigDict(from_attributes=True) - class SystemMetadata(BaseModel): """ The SystemMetadata resource model. diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 73b151be..a05bdad2 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -423,7 +423,6 @@ def test_expanded_system(self): def test_flexible_legal_basis_default(self): pd = PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", ingress=["user"], From 191f6df2e07e2855fef7cc26cbdc6719082f8122 Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 27 Nov 2023 22:12:59 +0800 Subject: [PATCH 21/27] fix static checks --- src/fideslang/models.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 77e4b63c..85422090 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -1275,7 +1275,9 @@ def verify_privacy_declarations(self) -> "System": """ if self.privacy_declarations: - for declaration in self.privacy_declarations: + for ( + declaration + ) in self.privacy_declarations: # pylint: disable=not-an-iterable for direction in ["egress", "ingress"]: flow_fides_keys = getattr(declaration, direction, None) if flow_fides_keys is not None: @@ -1286,9 +1288,12 @@ def verify_privacy_declarations(self) -> "System": data_flows is not None and len(data_flows) > 0 ), f"PrivacyDeclaration '{declaration.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." - for fides_key in flow_fides_keys: + for ( + fides_key + ) in flow_fides_keys: # pylint: disable=not-an-iterable assert fides_key in [ - data_flow.fides_key for data_flow in data_flows + data_flow.fides_key + for data_flow in data_flows # pylint: disable=not-an-iterable ], f"PrivacyDeclaration '{declaration.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." return self From 9ec615a461fb66d48bf5de655868f7e326407ebe Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 28 Nov 2023 14:18:51 +0800 Subject: [PATCH 22/27] fix the validators on our custom fideskey types to be json schema valid --- src/fideslang/validation.py | 6 +++--- tests/fideslang/test_default_taxonomy.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index be08587a..b3ca93bf 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -6,7 +6,7 @@ from typing import Annotated, Dict, List, Optional, Pattern, Set, Tuple from packaging.version import Version -from pydantic.functional_validators import PlainValidator +from pydantic.functional_validators import BeforeValidator from fideslang.default_fixtures import COUNTRY_CODES @@ -30,7 +30,7 @@ def validate_fides_key(value: str) -> str: return value -FidesKey = Annotated[str, PlainValidator(validate_fides_key)] +FidesKey = Annotated[str, BeforeValidator(validate_fides_key)] def validate_collection_key_parts(value: str) -> str: @@ -50,7 +50,7 @@ def validate_collection_key_parts(value: str) -> str: # Dataset.Collection name where both dataset and collection names are valid FidesKeys -FidesCollectionKey = Annotated[str, PlainValidator(validate_collection_key_parts)] +FidesCollectionKey = Annotated[str, BeforeValidator(validate_collection_key_parts)] def sort_list_objects_by_name(values: List) -> List: diff --git a/tests/fideslang/test_default_taxonomy.py b/tests/fideslang/test_default_taxonomy.py index d04f8e9b..be9075b3 100644 --- a/tests/fideslang/test_default_taxonomy.py +++ b/tests/fideslang/test_default_taxonomy.py @@ -25,6 +25,12 @@ def test_taxonomy_count(self, type_and_count: Tuple[str, int]) -> None: def test_are_set_as_default(self, data_type: str) -> None: assert all([x.is_default for x in getattr(DEFAULT_TAXONOMY, data_type)]) + @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) + def test_valid_json_schemas(self, data_type: str) -> None: + # If this fails, an error will get thrown + getattr(DEFAULT_TAXONOMY, data_type)[0].model_json_schema() + assert True + @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) def test_key_uniqueness(self, data_type: str) -> None: keys = [x.fides_key for x in getattr(DEFAULT_TAXONOMY, data_type)] From 43abfb73da46dfb225f3b47799aea912fa3e0f9d Mon Sep 17 00:00:00 2001 From: Thomas Date: Tue, 28 Nov 2023 14:35:35 +0800 Subject: [PATCH 23/27] turned off strict checking for the registry id --- src/fideslang/models.py | 3 ++- tests/conftest.py | 2 +- tests/fideslang/test_validation.py | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 85422090..f50f721f 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -1098,6 +1098,7 @@ class System(FidesModel): registry_id: Optional[int] = Field( default=None, description="The id of the system registry, if used.", + strict=False, # This allows Pydantic to coerce '1' -> 1 ) meta: Optional[Dict] = meta_field fidesctl_meta: Optional[SystemMetadata] = Field( @@ -1149,7 +1150,7 @@ class System(FidesModel): description="If specified, the unique identifier for the vendor that was previously associated with this system.", ) dataset_references: Optional[List[FidesKey]] = Field( - default=None, + default=[], description="Referenced Dataset fides keys used by the system.", ) processes_personal_data: bool = Field( diff --git a/tests/conftest.py b/tests/conftest.py index ad8fbc20..f621201c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def resources_dict(): ), "system": models.System( organization_fides_key="1", - registryId="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 322a96a6..5f310109 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -404,7 +404,6 @@ def test_valid_policy(): organization_fides_key="1", fides_key="test_policy", name="Test Policy", - version="1.3", description="Test Policy", rules=[], ) @@ -415,7 +414,7 @@ def test_valid_policy(): def test_create_valid_system(): System( organization_fides_key="1", - registryId="1", + registry_id=1, fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -446,7 +445,7 @@ def test_invalid_country_identifier(country_code: str): with pytest.raises(ValidationError): System( organization_fides_key="1", - registryId="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -471,7 +470,7 @@ def test_valid_country_identifier(country_code: str): """Validates usage of alpha-3 codes per ISO 3166""" System( organization_fides_key="1", - registryId="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", From e7de84012b51ed697f3b19b8e30d543d8d1c9b5f Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 30 Nov 2023 12:11:32 +0800 Subject: [PATCH 24/27] add more None defaults to optional types --- src/fideslang/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index f50f721f..3d37fedf 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -474,8 +474,8 @@ class FidesMeta(BaseModel): """Supplementary metadata used by the Fides application for additional features.""" references: Optional[List[FidesDatasetReference]] = Field( - description="Fields that current field references or is referenced by. Used for drawing the edges of a DSR graph.", default=None, + description="Fields that current field references or is referenced by. Used for drawing the edges of a DSR graph.", ) identity: Optional[str] = Field( default=None, @@ -923,9 +923,11 @@ class DataProtectionImpactAssessment(BaseModel): description="A boolean value determining if a data protection impact assessment is required. Defaults to False.", ) progress: Optional[str] = Field( + default=None, description="The optional status of a Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) link: Optional[AnyUrl] = Field( + default=None, description="The optional link to the Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) @@ -1058,6 +1060,7 @@ class DataFlow(BaseModel): description=f"Specifies the resource model class for which the `fides_key` applies. May be any of {', '.join([member.value for member in FlowableResources])}.", ) data_categories: Optional[List[FidesKey]] = Field( + default=None, description="An array of data categories describing the data in transit.", ) From a66a4ce021a40b2f198672a0484f5cf964c969a7 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 30 Nov 2023 13:33:26 +0800 Subject: [PATCH 25/27] remove all uses of URL since they might cause issues with database storage in Fides --- src/fideslang/models.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 3d37fedf..32421acf 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -11,12 +11,10 @@ from packaging.version import InvalidVersion, Version from pydantic import ( - AnyUrl, BaseModel, ConfigDict, Field, ValidationInfo, - HttpUrl, PositiveInt, field_validator, model_validator, @@ -357,7 +355,7 @@ class DataUse(FidesModel, DefaultModel): default=None, description="Deprecated. A boolean representation of if the legal basis used is `Legitimate Interest`. Validated at run time and looks for a `legitimate_interest_impact_assessment` to exist if true.", ) - legitimate_interest_impact_assessment: Optional[AnyUrl] = Field( + legitimate_interest_impact_assessment: Optional[str] = Field( default=None, description="Deprecated. A url pointing to the legitimate interest impact assessment. Required if the legal bases used is legitimate interest.", ) @@ -406,7 +404,7 @@ def set_legitimate_interest(cls, value: bool, info: ValidationInfo) -> bool: @field_validator("legitimate_interest_impact_assessment") @classmethod - def ensure_impact_assessment(cls, value: AnyUrl, info: ValidationInfo) -> AnyUrl: + def ensure_impact_assessment(cls, value: str, info: ValidationInfo) -> str: """ Validates an impact assessment is applied if a legitimate interest has been defined. @@ -828,8 +826,8 @@ class Organization(FidesModel): default=None, description=ContactDetails.__doc__, ) - security_policy: Optional[HttpUrl] = Field( - default=None, description="Am optional URL to the organization security policy." + security_policy: Optional[str] = Field( + default=None, description="An optional URL to the organization security policy." ) @@ -926,7 +924,7 @@ class DataProtectionImpactAssessment(BaseModel): default=None, description="The optional status of a Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) - link: Optional[AnyUrl] = Field( + link: Optional[str] = Field( default=None, description="The optional link to the Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) @@ -1195,7 +1193,7 @@ class System(FidesModel): default=None, description="The optional status of a Data Protection Impact Assessment", ) - privacy_policy: Optional[AnyUrl] = Field( + privacy_policy: Optional[str] = Field( default=None, description="A URL that points to the System's publicly accessible privacy policy.", ) @@ -1236,7 +1234,7 @@ class System(FidesModel): default=False, description="Whether the system uses non-cookie methods of storage or accessing information stored on a user's device.", ) - legitimate_interest_disclosure_url: Optional[AnyUrl] = Field( + legitimate_interest_disclosure_url: Optional[str] = Field( default=None, description="A URL that points to the system's publicly accessible legitimate interest disclosure.", ) From 557aebfba7ca28bc1bd04e1e8fd42dbe12a7df79 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 30 Nov 2023 14:47:37 +0800 Subject: [PATCH 26/27] remove a validator on System that caused issues when loading from Orm in Fides --- src/fideslang/models.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 32421acf..59165b4e 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -1249,26 +1249,6 @@ class System(FidesModel): _check_valid_country_code = country_code_validator - @model_validator(mode="before") - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of System fields pending deprecation. - """ - deprecated_fields = [ - "joint_controller", - "third_country_transfers", - "data_responsibility_title", - "data_protection_impact_assessment", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values - @model_validator(mode="after") def verify_privacy_declarations(self) -> "System": """ From b87a2299e742fab96c14fb21b4c5bcc2685f8b62 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 30 Nov 2023 15:41:45 +0800 Subject: [PATCH 27/27] remove deprecation tests and update model (root) validators --- src/fideslang/models.py | 50 ++-------------- tests/fideslang/gvl/test_gvl.py | 13 ++--- tests/fideslang/test_models.py | 84 ++------------------------- tests/fideslang/test_parse.py | 3 +- tests/fideslang/test_relationships.py | 22 ++----- tests/fideslang/test_validation.py | 32 +++------- 6 files changed, 26 insertions(+), 178 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 59165b4e..670958eb 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -308,19 +308,18 @@ class DataSubjectRights(BaseModel): description="A list of valid data subject rights to be used when applying data rights to a data subject via a strategy.", ) - @model_validator(mode="before") - @classmethod - def include_exclude_has_values(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def include_exclude_has_values(self) -> "DataSubjectRights": """ Validate the if include or exclude is chosen, that at least one value is present. """ - strategy, rights = values.get("strategy"), values.get("values") + strategy, rights = self.strategy, self.values if strategy in ("INCLUDE", "EXCLUDE"): assert ( rights is not None ), f"If {strategy} is chosen, rights must also be listed." - return values + return self class DataSubject(FidesModel, DefaultModel): @@ -371,27 +370,6 @@ def parent_key_checks(self) -> "DataUse": return self - @model_validator(mode="before") - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of Data Use fields pending deprecation. - """ - deprecated_fields = [ - "legal_basis", - "recipients", - "special_category", - "legitimate_interest", - "legitimate_interest_impact_assessment", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values - @field_validator("legitimate_interest") @classmethod def set_legitimate_interest(cls, value: bool, info: ValidationInfo) -> bool: @@ -695,26 +673,6 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): _check_valid_country_code = country_code_validator _unique_items_in_list = field_validator("collections")(unique_items_in_list) - @model_validator(mode="before") - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of Dataset fields pending deprecation. - """ - # TODO: Do we want to remove these for Fideslang 3? - deprecated_fields = [ - "joint_controller", - "retention", - "third_country_transfers", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values - # Evaluation class ViolationAttributes(BaseModel): diff --git a/tests/fideslang/gvl/test_gvl.py b/tests/fideslang/gvl/test_gvl.py index abda6fd2..118ba492 100644 --- a/tests/fideslang/gvl/test_gvl.py +++ b/tests/fideslang/gvl/test_gvl.py @@ -1,14 +1,9 @@ import pytest -from fideslang.gvl import ( - GVL_FEATURES, - GVL_SPECIAL_FEATURES, - Feature, - data_category_id_to_data_categories, - feature_id_to_feature_name, - feature_name_to_feature, - purpose_to_data_use, -) +from fideslang.gvl import (GVL_FEATURES, GVL_SPECIAL_FEATURES, Feature, + data_category_id_to_data_categories, + feature_id_to_feature_name, feature_name_to_feature, + purpose_to_data_use) def test_purpose_to_data_use(): diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index a05bdad2..1040ed05 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -1,13 +1,9 @@ -from pytest import deprecated_call, mark, raises +from pytest import mark, raises -from fideslang import DataFlow, Dataset, Organization, PrivacyDeclaration, System -from fideslang.models import ( - ContactDetails, - DataResponsibilityTitle, - DatasetCollection, - DatasetField, - DataUse, -) +from fideslang.models import (ContactDetails, DataFlow, + DataResponsibilityTitle, Dataset, + DatasetCollection, DatasetField, DataUse, + Organization, PrivacyDeclaration, System) pytestmark = mark.unit @@ -430,44 +426,6 @@ def test_flexible_legal_basis_default(self): ) assert pd.flexible_legal_basis_for_processing - @mark.parametrize( - "deprecated_field,value", - [ - ("data_responsibility_title", "Controller"), - ( - "joint_controller", - { - "name": "Jane Doe", - "address": "104 Test Lane; Test Town, TX, 32522", - "email": "jane@example.com", - "phone": "345-255-2555", - }, - ), - ("third_country_transfers", ["GBR"]), - ( - "data_protection_impact_assessment", - { - "is_required": True, - "progress": "pending", - "link": "https://www.example.com/dpia", - }, - ), - ], - ) - def test_system_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert System( - **{ - "description": "Test System", - "fides_key": "test_system", - "name": "Test System", - "registry": "1", - "system_type": "SYSTEM", - "privacy_declarations": [], - deprecated_field: value, - } - ) - class TestDataset: def test_valid_dataset(self): @@ -529,24 +487,6 @@ def test_valid_dataset(self): ], ) - @mark.parametrize( - "deprecated_field,value", - [ - ("joint_controller", {"name": "Controller_name"}), - ("retention", "90 days"), - ("third_country_transfers", ["IRL"]), - ], - ) - def test_dataset_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert Dataset( - **{ - "fides_key": "test_dataset", - "collections": [], - deprecated_field: value, - } - ) - def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", @@ -577,17 +517,3 @@ def test_dataset_collection_skip_processing(self): class TestDataUse: def test_minimal_data_use(self): assert DataUse(fides_key="new_use") - - @mark.parametrize( - "deprecated_field,value", - [ - ("legal_basis", "Legal Obligation"), - ("special_category", "Substantial Public Interest"), - ("recipients", ["Advertising Bureau"]), - ("legitimate_interest", False), - ("legitimate_interest_impact_assessment", "https://www.example.com"), - ], - ) - def test_datause_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert DataUse(**{"fides_key": "new_use", deprecated_field: value}) diff --git a/tests/fideslang/test_parse.py b/tests/fideslang/test_parse.py index d8e75171..5c90cd74 100644 --- a/tests/fideslang/test_parse.py +++ b/tests/fideslang/test_parse.py @@ -1,7 +1,6 @@ import pytest -from fideslang import models -from fideslang import parse +from fideslang import models, parse @pytest.mark.unit diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index ca546382..4a409147 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -1,23 +1,11 @@ import pytest from fideslang import relationships -from fideslang.models import ( - ContactDetails, - DataCategory, - DataFlow, - DataProtectionImpactAssessment, - Dataset, - DatasetCollection, - DatasetField, - DataUse, - MatchesEnum, - Organization, - Policy, - PolicyRule, - PrivacyDeclaration, - System, - Taxonomy, -) +from fideslang.models import (ContactDetails, DataCategory, DataFlow, + DataProtectionImpactAssessment, Dataset, + DatasetCollection, DatasetField, DataUse, + MatchesEnum, Organization, Policy, PolicyRule, + PrivacyDeclaration, System, Taxonomy) @pytest.mark.unit diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 5f310109..cfe3459f 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -1,31 +1,13 @@ import pytest from pydantic import ValidationError -from fideslang.models import ( - CollectionMeta, - DataCategory, - DataFlow, - Dataset, - DataUse, - DataSubject, - DatasetCollection, - DatasetField, - DatasetMetadata, - DataUse, - FidesDatasetReference, - FidesMeta, - FidesModel, - Policy, - PolicyRule, - PrivacyDeclaration, - PrivacyRule, - System, -) -from fideslang.validation import ( - FidesValidationError, - validate_fides_key, - valid_data_type, -) +from fideslang.models import (CollectionMeta, DataCategory, DataFlow, Dataset, + DatasetCollection, DatasetField, DatasetMetadata, + DataSubject, DataUse, FidesDatasetReference, + FidesMeta, FidesModel, Policy, PolicyRule, + PrivacyDeclaration, PrivacyRule, System) +from fideslang.validation import (FidesValidationError, valid_data_type, + validate_fides_key) DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject]