diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index d6719934..54191aeb 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -93,9 +93,9 @@ jobs: Pytest-Matrix: strategy: matrix: - python_version: ["3.8", "3.9", "3.10", "3.11"] - pydantic_version: ["1.8.2", "1.9.2", "1.10.9"] - pyyaml_version: ["5.4.1", "6.0"] + python_version: ["3.9", "3.10", "3.11"] + pydantic_version: ["2.3.0", "2.4.2", "2.5.3", "2.6.4", "2.7.1"] + pyyaml_version: ["5.4.1", "6.0.1"] runs-on: ubuntu-latest continue-on-error: true steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 46bc4594..274a5285 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fideslang/compare/3.0.1...main) +### Changed + +- Upgrades Pydantic for V2 support and removes support for Pydantic V1 [#11](https://github.com/ethyca/fideslang/pull/11) +- Removes Python 3.8 from supported versions [#11](https://github.com/ethyca/fideslang/pull/11) +- ## [3.0.1](https://github.com/ethyca/fideslang/compare/3.0.0...3.0.1) ### Added diff --git a/Dockerfile b/Dockerfile index 87c40171..5b0aa15c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-slim-bullseye as base +FROM python:3.9-slim-bullseye as base # Update pip in the base image since we'll use it everywhere RUN pip install -U pip diff --git a/dev-requirements.txt b/dev-requirements.txt index f91fc28a..ef60250a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,12 +1,12 @@ black==23.3.0 -mypy==1.4.0 +mypy==1.10.0 nox>=2023 packaging>=22.0 -pre-commit==2.9.3 +pre-commit==3.7.1 pylint==2.10.0 pytest==7.3.1 pytest-cov==2.11.1 requests-mock==1.8.0 setuptools>=64.0.2 types-PyYAML -xenon==0.7.3 +xenon==0.9.1 diff --git a/mkdocs/Dockerfile b/mkdocs/Dockerfile index e0e4e86d..5162caa9 100644 --- a/mkdocs/Dockerfile +++ b/mkdocs/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-slim-bullseye +FROM python:3.9-slim-bullseye # Install auxiliary software RUN apt-get update diff --git a/noxfile.py b/noxfile.py index 2b0dd342..2a4aae3f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -3,9 +3,10 @@ nox.options.sessions = [] nox.options.reuse_existing_virtualenvs = True -TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] -TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9"] -TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] +# These should match what is in the `pr_checks.yml` file for CI runs +TESTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11"] +TESTED_PYDANTIC_VERSIONS = ["2.3.0", "2.4.2", "2.5.3", "2.6.4", "2.7.1"] +TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0.1"] def install_requirements(session: nox.Session) -> None: diff --git a/pyproject.toml b/pyproject.toml index 47a26082..c9befed7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,13 +9,12 @@ name = "fideslang" description = "Fides Taxonomy Language" dynamic = ["dependencies", "version"] readme = "README.md" -requires-python = ">=3.8, <4" +requires-python = ">=3.9, <4" authors = [{ name = "Ethyca, Inc.", email = "fidesteam@ethyca.com" }] license = { text = "Apache License 2.0" } classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/requirements.txt b/requirements.txt index cc280885..50bd6bb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pydantic>=1.8.1,<1.11.0 +pydantic>=2.3.0,<=2.7.1 pyyaml>=5,<7 packaging>=20.0 diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index f97cc94f..39133a6e 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -18,5 +18,5 @@ def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: # This is the version where we started tracking from, so # we use it as the default starting point. kwargs["version_added"] = "2.0.0" # type: ignore[assignment] - item = taxonomy_class.parse_obj(kwargs) + item = taxonomy_class.model_validate(kwargs) return item diff --git a/src/fideslang/gvl/__init__.py b/src/fideslang/gvl/__init__.py index 2a298eab..ec1c7d0a 100644 --- a/src/fideslang/gvl/__init__.py +++ b/src/fideslang/gvl/__init__.py @@ -50,16 +50,16 @@ def _load_data() -> None: ) as mapping_file: data = load(mapping_file) for raw_purpose in data["purposes"].values(): - purpose = Purpose.parse_obj(raw_purpose) - mapped_purpose = MappedPurpose.parse_obj(raw_purpose) + purpose = Purpose.model_validate(raw_purpose) + mapped_purpose = MappedPurpose.model_validate(raw_purpose) GVL_PURPOSES[purpose.id] = purpose MAPPED_PURPOSES[mapped_purpose.id] = mapped_purpose for data_use in mapped_purpose.data_uses: MAPPED_PURPOSES_BY_DATA_USE[data_use] = mapped_purpose for raw_special_purpose in data["specialPurposes"].values(): - special_purpose = Purpose.parse_obj(raw_special_purpose) - mapped_special_purpose = MappedPurpose.parse_obj(raw_special_purpose) + special_purpose = Purpose.model_validate(raw_special_purpose) + mapped_special_purpose = MappedPurpose.model_validate(raw_special_purpose) GVL_SPECIAL_PURPOSES[special_purpose.id] = special_purpose MAPPED_SPECIAL_PURPOSES[mapped_special_purpose.id] = mapped_special_purpose for data_use in mapped_special_purpose.data_uses: @@ -71,12 +71,12 @@ def _load_data() -> None: feature_data = load(feature_mapping_file) for raw_feature in feature_data["features"].values(): - feature = Feature.parse_obj(raw_feature) + feature = Feature.model_validate(raw_feature) GVL_FEATURES[feature.id] = feature FEATURES_BY_NAME[feature.name] = feature for raw_special_feature in feature_data["specialFeatures"].values(): - special_feature = Feature.parse_obj(raw_special_feature) + special_feature = Feature.model_validate(raw_special_feature) GVL_SPECIAL_FEATURES[special_feature.id] = special_feature FEATURES_BY_NAME[special_feature.name] = special_feature @@ -86,8 +86,8 @@ def _load_data() -> None: data_category_data = load(data_category_mapping_file) for raw_data_category in data_category_data.values(): - data_category = GVLDataCategory.parse_obj(raw_data_category) - mapped_data_category = MappedDataCategory.parse_obj(raw_data_category) + data_category = GVLDataCategory.model_validate(raw_data_category) + mapped_data_category = MappedDataCategory.model_validate(raw_data_category) GVL_DATA_CATEGORIES[data_category.id] = data_category MAPPED_GVL_DATA_CATEGORIES[mapped_data_category.id] = mapped_data_category diff --git a/src/fideslang/gvl/models.py b/src/fideslang/gvl/models.py index 9f0155cf..1f599e4c 100644 --- a/src/fideslang/gvl/models.py +++ b/src/fideslang/gvl/models.py @@ -32,7 +32,8 @@ class MappedPurpose(Purpose): class Feature(BaseModel): - "Pydantic model for GVL feature records" + """Pydantic model for GVL feature records""" + id: int = Field(description="Official GVL feature ID or special feature ID") name: str = Field(description="Name of the GVL feature or special feature.") description: str = Field( diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 6d380286..92626a88 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -7,22 +7,26 @@ from datetime import datetime from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Annotated, Dict, List, Optional, Union +from packaging.version import InvalidVersion, Version from pydantic import ( - AnyUrl, + AfterValidator, BaseModel, - ConstrainedStr, + ConfigDict, Field, HttpUrl, PositiveInt, - root_validator, - validator, + SerializeAsAny, + ValidationInfo, + field_validator, + model_validator, ) from fideslang.validation import ( + AnyUrlString, FidesKey, - FidesVersion, + FidesValidationError, deprecated_version_later_than_added, has_versioning_if_default, is_deprecated_if_replaced, @@ -32,28 +36,25 @@ sort_list_objects_by_name, unique_items_in_list, valid_data_type, + validate_fides_key, ) -matching_parent_key_validator = validator("parent_key", allow_reuse=True, always=True)( - matching_parent_key +matching_parent_key_validator = field_validator("parent_key")(matching_parent_key) +no_self_reference_validator = field_validator("parent_key")(no_self_reference) +has_versioning_if_default_validator = field_validator("is_default")( + has_versioning_if_default ) -no_self_reference_validator = validator("parent_key", allow_reuse=True)( - no_self_reference -) -has_versioning_if_default_validator = validator( - "is_default", allow_reuse=True, always=True -)(has_versioning_if_default) -deprecated_version_later_than_added_validator = validator( - "version_deprecated", allow_reuse=True +deprecated_version_later_than_added_validator = field_validator( + "version_deprecated", )(deprecated_version_later_than_added) -is_deprecated_if_replaced_validator = validator("replaced_by", allow_reuse=True)( +is_deprecated_if_replaced_validator = field_validator("replaced_by")( is_deprecated_if_replaced ) # Reusable Fields name_field = Field(description="Human-Readable name for this resource.") description_field = Field( - description="A detailed description of what this resource is." + default=None, description="A detailed description of what this resource is." ) meta_field = Field( default=None, @@ -72,13 +73,13 @@ class FidesModel(BaseModel): description="Defines the Organization that this resource belongs to.", ) tags: Optional[List[str]] = None - name: Optional[str] = name_field + name: Optional[str] = Field( + default=None, description="Human-Readable name for this resource." + ) description: Optional[str] = description_field - - class Config: - "Config for the FidesModel" - extra = "ignore" - orm_mode = True + model_config = ConfigDict( + extra="ignore", from_attributes=True, coerce_numbers_to_str=True + ) class DefaultModel(BaseModel): @@ -100,6 +101,7 @@ class DefaultModel(BaseModel): ) is_default: bool = Field( default=False, + validate_default=True, description="Denotes whether the resource is part of the default taxonomy or not.", ) @@ -109,32 +111,42 @@ class DefaultModel(BaseModel): ) _is_deprecated_if_replaced: classmethod = is_deprecated_if_replaced_validator - @validator("version_added") + @field_validator("version_added") @classmethod - def validate_verion_added( - cls, version_added: Optional[str], values: Dict - ) -> Optional[str]: + def validate_version_added(cls, version_added: Optional[str]) -> Optional[str]: """ - Validate that the `version_added` field is a proper FidesVersion + Validate that the `version_added` field is a proper Version """ if not version_added: return None - FidesVersion.validate(version_added) + try: + Version(version_added) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_added' does not have a valid version: {version_added}" + ) + return version_added - @validator("version_deprecated") + @field_validator("version_deprecated") @classmethod def validate_version_deprecated( - cls, version_deprecated: Optional[str], values: Dict + cls, version_deprecated: Optional[str] ) -> Optional[str]: """ - Validate that the `version_deprecated` is a proper FidesVersion + Validate that the `version_deprecated` is a proper Version """ if not version_deprecated: return None - FidesVersion.validate(version_deprecated) + try: + Version(version_deprecated) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_deprecated' does not have a valid version: {version_deprecated}" + ) + return version_deprecated @@ -246,23 +258,19 @@ class SpecialCategoryLegalBasisEnum(str, Enum): class DataCategory(FidesModel, DefaultModel): """The DataCategory resource model.""" - parent_key: Optional[FidesKey] + parent_key: Optional[FidesKey] = Field(default=None, validate_default=True) - _matching_parent_key: classmethod = matching_parent_key_validator _no_self_reference: classmethod = no_self_reference_validator + _matching_parent_key: classmethod = matching_parent_key_validator class Cookies(BaseModel): """The Cookies resource model""" name: str - path: Optional[str] - domain: Optional[str] - - class Config: - """Config for the cookies""" - - orm_mode = True + path: Optional[str] = None + domain: Optional[str] = None + model_config = ConfigDict(from_attributes=True) class DataSubjectRights(BaseModel): @@ -278,29 +286,32 @@ class DataSubjectRights(BaseModel): description="Defines the strategy used when mapping data rights to a data subject.", ) values: Optional[List[DataSubjectRightsEnum]] = Field( + default=None, description="A list of valid data subject rights to be used when applying data rights to a data subject via a strategy.", ) - @root_validator() - @classmethod - def include_exclude_has_values(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def include_exclude_has_values(self) -> "DataSubjectRights": """ Validate the if include or exclude is chosen, that at least one value is present. """ - strategy, rights = values.get("strategy"), values.get("values") + strategy, rights = self.strategy, self.values if strategy in ("INCLUDE", "EXCLUDE"): assert ( rights is not None ), f"If {strategy} is chosen, rights must also be listed." - return values + return self class DataSubject(FidesModel, DefaultModel): """The DataSubject resource model.""" - rights: Optional[DataSubjectRights] = Field(description=DataSubjectRights.__doc__) + rights: Optional[DataSubjectRights] = Field( + default=None, description=DataSubjectRights.__doc__ + ) automated_decisions_or_profiling: Optional[bool] = Field( + default=None, description="A boolean value to annotate whether or not automated decisions/profiling exists for the data subject.", ) @@ -308,9 +319,10 @@ class DataSubject(FidesModel, DefaultModel): class DataUse(FidesModel, DefaultModel): """The DataUse resource model.""" - parent_key: Optional[FidesKey] = None - _matching_parent_key: classmethod = matching_parent_key_validator + parent_key: Optional[FidesKey] = Field(default=None, validate_default=True) + _no_self_reference: classmethod = no_self_reference_validator + _matching_parent_key: classmethod = matching_parent_key_validator # Dataset @@ -339,6 +351,7 @@ class MyDatasetField(DatasetFieldBase): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", ) @@ -355,7 +368,7 @@ class FidesDatasetReference(BaseModel): dataset: FidesKey field: str - direction: Optional[EdgeDirection] + direction: Optional[EdgeDirection] = None class FidesMeta(BaseModel): @@ -366,25 +379,31 @@ class FidesMeta(BaseModel): default=None, ) identity: Optional[str] = Field( - description="The type of the identity data that should be used to query this collection for a DSR." + default=None, + description="The type of the identity data that should be used to query this collection for a DSR.", ) primary_key: Optional[bool] = Field( - description="Whether the current field can be considered a primary key of the current collection" + default=None, + description="Whether the current field can be considered a primary key of the current collection", ) data_type: Optional[str] = Field( - description="Optionally specify the data type. Fides will attempt to cast values to this type when querying." + default=None, + description="Optionally specify the data type. Fides will attempt to cast values to this type when querying.", ) length: Optional[PositiveInt] = Field( - description="Optionally specify the allowable field length. Fides will not generate values that exceed this size." + default=None, + description="Optionally specify the allowable field length. Fides will not generate values that exceed this size.", ) return_all_elements: Optional[bool] = Field( - description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False." + default=None, + description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False.", ) read_only: Optional[bool] = Field( - description="Optionally specify if a field is read-only, meaning it can't be updated or deleted." + default=None, + description="Optionally specify if a field is read-only, meaning it can't be updated or deleted.", ) - @validator("data_type") + @field_validator("data_type") @classmethod def valid_data_type(cls, value: Optional[str]) -> Optional[str]: """Validate that all annotated data types exist in the taxonomy""" @@ -416,10 +435,11 @@ class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat): fides_meta: Optional[FidesMeta] = None fields: Optional[List[DatasetField]] = Field( + default=None, description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).", ) - @validator("fides_meta") + @field_validator("fides_meta") @classmethod def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: """Validate upfront that the return_all_elements flag can only be specified on array fields""" @@ -435,22 +455,21 @@ def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: ) return meta_values - @validator("fields") - @classmethod - def validate_object_fields( # type: ignore - cls, - fields: Optional[List["DatasetField"]], - values: Dict[str, Any], - ) -> Optional[List["DatasetField"]]: + @model_validator(mode="after") + def validate_object_fields( + self, + _: ValidationInfo, + ) -> DatasetField: """Two validation checks for object fields: - If there are sub-fields specified, type should be either empty or 'object' - Additionally object fields cannot have data_categories. """ + fields = self.fields declared_data_type = None - field_name: str = values.get("name") # type: ignore + field_name: str = self.name - if values.get("fides_meta"): - declared_data_type = values["fides_meta"].data_type + if self.fides_meta: + declared_data_type = self.fides_meta.data_type if fields and declared_data_type: data_type, _ = parse_data_type_string(declared_data_type) @@ -459,43 +478,39 @@ def validate_object_fields( # type: ignore f"The data type '{data_type}' on field '{field_name}' is not compatible with specified sub-fields. Convert to an 'object' field." ) - if (fields or declared_data_type == "object") and values.get("data_categories"): + if (fields or declared_data_type == "object") and self.data_categories: raise ValueError( f"Object field '{field_name}' cannot have specified data_categories. Specify category on sub-field instead" ) - - return fields + return self # this is required for the recursive reference in the pydantic model: -DatasetField.update_forward_refs() +DatasetField.model_rebuild() -class FidesCollectionKey(ConstrainedStr): +def validate_fides_collection_key(value: str) -> str: """ - Dataset.Collection name where both dataset and collection names are valid FidesKeys + Overrides validation to check FidesCollectionKey format, and that both the dataset + and collection names have the FidesKey format. """ + values = value.split(".") + if len(values) == 2: + validate_fides_key(values[0]) + validate_fides_key(values[1]) + return value + raise ValueError( + "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + ) - @classmethod - def validate(cls, value: str) -> str: - """ - Overrides validation to check FidesCollectionKey format, and that both the dataset - and collection names have the FidesKey format. - """ - values = value.split(".") - if len(values) == 2: - FidesKey.validate(values[0]) - FidesKey.validate(values[1]) - return value - raise ValueError( - "FidesCollection must be specified in the form 'FidesKey.FidesKey'" - ) + +FidesCollectionKey = Annotated[str, AfterValidator(validate_fides_collection_key)] class CollectionMeta(BaseModel): """Collection-level specific annotations used for query traversal""" - after: Optional[List[FidesCollectionKey]] + after: Optional[List[FidesCollectionKey]] = None skip_processing: Optional[bool] = False @@ -509,6 +524,7 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", ) fields: List[DatasetField] = Field( @@ -517,12 +533,8 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): fides_meta: Optional[CollectionMeta] = None - _sort_fields: classmethod = validator("fields", allow_reuse=True)( - sort_list_objects_by_name - ) - _unique_items_in_list: classmethod = validator("fields", allow_reuse=True)( - unique_items_in_list - ) + _sort_fields: classmethod = field_validator("fields")(sort_list_objects_by_name) # type: ignore[assignment] + _unique_items_in_list: classmethod = field_validator("fields")(unique_items_in_list) # type: ignore[assignment] class ContactDetails(BaseModel): @@ -561,8 +573,8 @@ class DatasetMetadata(BaseModel): Object used to hold application specific metadata for a dataset """ - resource_id: Optional[str] - after: Optional[List[FidesKey]] + resource_id: Optional[str] = None + after: Optional[List[FidesKey]] = None class Dataset(FidesModel, FidesopsMetaBackwardsCompat): @@ -570,6 +582,7 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): meta: Optional[Dict] = meta_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", ) fides_meta: Optional[DatasetMetadata] = Field( @@ -579,10 +592,10 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): description="An array of objects that describe the Dataset's collections.", ) - _sort_collections: classmethod = validator("collections", allow_reuse=True)( + _sort_collections: classmethod = field_validator("collections")( # type: ignore[assignment] sort_list_objects_by_name ) - _unique_items_in_list: classmethod = validator("collections", allow_reuse=True)( + _unique_items_in_list: classmethod = field_validator("collections")( # type: ignore[assignment] unique_items_in_list ) @@ -639,11 +652,7 @@ class Evaluation(BaseModel): default="", description="A human-readable string response for the evaluation.", ) - - class Config: - "Config for the Evaluation" - extra = "ignore" - orm_mode = True + model_config = ConfigDict(extra="ignore", from_attributes=True) # Organization @@ -668,7 +677,8 @@ class OrganizationMetadata(BaseModel): """ resource_filters: Optional[List[ResourceFilter]] = Field( - description="A list of filters that can be used when generating or scanning systems." + default=None, + description="A list of filters that can be used when generating or scanning systems.", ) @@ -685,19 +695,23 @@ class Organization(FidesModel): description="An inherited field from the FidesModel that is unused with an Organization.", ) controller: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) data_protection_officer: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) fidesctl_meta: Optional[OrganizationMetadata] = Field( + default=None, description=OrganizationMetadata.__doc__, ) representative: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) security_policy: Optional[HttpUrl] = Field( - description="Am optional URL to the organization security policy." + default=None, description="Am optional URL to the organization security policy." ) @@ -760,9 +774,7 @@ class Policy(FidesModel): description=PolicyRule.__doc__, ) - _sort_rules: classmethod = validator("rules", allow_reuse=True)( - sort_list_objects_by_name - ) + _sort_rules: classmethod = field_validator("rules")(sort_list_objects_by_name) # type: ignore[assignment] class PrivacyDeclaration(BaseModel): @@ -774,6 +786,7 @@ class PrivacyDeclaration(BaseModel): """ name: Optional[str] = Field( + default=None, description="The name of the privacy declaration on the system.", ) data_categories: List[FidesKey] = Field( @@ -787,13 +800,16 @@ class PrivacyDeclaration(BaseModel): description="An array of data subjects describing a system in a privacy declaration.", ) dataset_references: Optional[List[FidesKey]] = Field( + default=None, description="Referenced Dataset fides keys used by the system.", ) egress: Optional[List[FidesKey]] = Field( - description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) ingress: Optional[List[FidesKey]] = Field( - description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) features: List[str] = Field( default_factory=list, description="The features of processing personal data." @@ -803,19 +819,23 @@ class PrivacyDeclaration(BaseModel): default=True, ) legal_basis_for_processing: Optional[LegalBasisForProcessingEnum] = Field( - description="The legal basis under which personal data is processed for this purpose." + default=None, + description="The legal basis under which personal data is processed for this purpose.", ) impact_assessment_location: Optional[str] = Field( - description="Where the legitimate interest impact assessment is stored" + default=None, + description="Where the legitimate interest impact assessment is stored", ) retention_period: Optional[str] = Field( - description="An optional string to describe the time period for which data is retained for this purpose." + default=None, + description="An optional string to describe the time period for which data is retained for this purpose.", ) processes_special_category_data: bool = Field( default=False, description="This system processes special category data", ) special_category_legal_basis: Optional[SpecialCategoryLegalBasisEnum] = Field( + default=None, description="The legal basis under which the special category data is processed.", ) data_shared_with_third_parties: bool = Field( @@ -823,6 +843,7 @@ class PrivacyDeclaration(BaseModel): description="This system shares data with third parties for this purpose.", ) third_parties: Optional[str] = Field( + default=None, description="The types of third parties the data is shared with.", ) shared_categories: List[str] = Field( @@ -830,13 +851,10 @@ class PrivacyDeclaration(BaseModel): description="The categories of personal data that this system shares with third parties.", ) cookies: Optional[List[Cookies]] = Field( + default=None, description="Cookies associated with this data use to deliver services and functionality", ) - - class Config: - """Config for the Privacy Declaration""" - - orm_mode = True + model_config = ConfigDict(from_attributes=True) class SystemMetadata(BaseModel): @@ -847,15 +865,22 @@ class SystemMetadata(BaseModel): """ resource_id: Optional[str] = Field( - description="The external resource id for the system being modeled." + default=None, + description="The external resource id for the system being modeled.", ) endpoint_address: Optional[str] = Field( - description="The host of the external resource for the system being modeled." + default=None, + description="The host of the external resource for the system being modeled.", ) endpoint_port: Optional[str] = Field( - description="The port of the external resource for the system being modeled." + default=None, + description="The port of the external resource for the system being modeled.", ) + model_config = ConfigDict( + coerce_numbers_to_str=True + ) # For backwards compat of endpoint_port + class FlowableResources(str, Enum): """ @@ -883,25 +908,25 @@ class DataFlow(BaseModel): description=f"Specifies the resource model class for which the `fides_key` applies. May be any of {', '.join([member.value for member in FlowableResources])}.", ) data_categories: Optional[List[FidesKey]] = Field( + default=None, description="An array of data categories describing the data in transit.", ) - @root_validator(skip_on_failure=True) - @classmethod - def user_special_case(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def user_special_case(self) -> "DataFlow": """ If either the `fides_key` or the `type` are set to "user", then the other must also be set to "user". """ - if values["fides_key"] == "user" or values["type"] == "user": + if self.fides_key == "user" or self.type == "user": assert ( - values["fides_key"] == "user" and values["type"] == "user" + self.fides_key == "user" and self.type == "user" ), "The 'user' fides_key is required for, and requires, the type 'user'" - return values + return self - @validator("type") + @field_validator("type") @classmethod def verify_type_is_flowable(cls, value: str) -> str: """ @@ -923,16 +948,17 @@ class System(FidesModel): meta: Optional[Dict] = meta_field fidesctl_meta: Optional[SystemMetadata] = Field( + default=None, description=SystemMetadata.__doc__, ) system_type: str = Field( description="A required value to describe the type of system being modeled, examples include: Service, Application, Third Party, etc.", ) egress: Optional[List[DataFlow]] = Field( - description="The resources to which the system sends data." + default=None, description="The resources to which the system sends data." ) ingress: Optional[List[DataFlow]] = Field( - description="The resources from which the system receives data." + default=None, description="The resources from which the system receives data." ) privacy_declarations: List[PrivacyDeclaration] = Field( description=PrivacyDeclaration.__doc__, @@ -942,13 +968,16 @@ class System(FidesModel): description="An optional value to identify the owning department or group of the system within your organization", ) vendor_id: Optional[str] = Field( - description="The unique identifier for the vendor that's associated with this system." + default=None, + description="The unique identifier for the vendor that's associated with this system.", ) previous_vendor_id: Optional[str] = Field( - description="If specified, the unique identifier for the vendor that was previously associated with this system." + default=None, + description="If specified, the unique identifier for the vendor that was previously associated with this system.", ) vendor_deleted_date: Optional[datetime] = Field( - description="The deleted date of the vendor that's associated with this system." + default=None, + description="The deleted date of the vendor that's associated with this system.", ) dataset_references: List[FidesKey] = Field( default_factory=list, @@ -963,7 +992,8 @@ class System(FidesModel): description="This toggle indicates whether the system is exempt from privacy regulation if they do process personal data.", ) reason_for_exemption: Optional[str] = Field( - description="The reason that the system is exempt from privacy regulation." + default=None, + description="The reason that the system is exempt from privacy regulation.", ) uses_profiling: bool = Field( default=False, @@ -986,35 +1016,41 @@ class System(FidesModel): description="Whether this system requires data protection impact assessments.", ) dpa_location: Optional[str] = Field( - description="Location where the DPAs or DIPAs can be found." + default=None, description="Location where the DPAs or DIPAs can be found." ) dpa_progress: Optional[str] = Field( - description="The optional status of a Data Protection Impact Assessment" + default=None, + description="The optional status of a Data Protection Impact Assessment", ) - privacy_policy: Optional[AnyUrl] = Field( - description="A URL that points to the system's publicly accessible privacy policy." + privacy_policy: SerializeAsAny[Optional[AnyUrlString]] = Field( + default=None, + description="A URL that points to the system's publicly accessible privacy policy.", ) legal_name: Optional[str] = Field( - description="The legal name for the business represented by the system." + default=None, + description="The legal name for the business represented by the system.", ) legal_address: Optional[str] = Field( - description="The legal address for the business represented by the system." + default=None, + description="The legal address for the business represented by the system.", ) responsibility: List[DataResponsibilityTitle] = Field( default_factory=list, description=DataResponsibilityTitle.__doc__, ) dpo: Optional[str] = Field( - description="The official privacy contact address or DPO." + default=None, description="The official privacy contact address or DPO." ) joint_controller_info: Optional[str] = Field( - description="The party or parties that share the responsibility for processing personal data." + default=None, + description="The party or parties that share the responsibility for processing personal data.", ) data_security_practices: Optional[str] = Field( - description="The data security practices employed by this system." + default=None, description="The data security practices employed by this system." ) cookie_max_age_seconds: Optional[int] = Field( - description="The maximum storage duration, in seconds, for cookies used by this system." + default=None, + description="The maximum storage duration, in seconds, for cookies used by this system.", ) uses_cookies: bool = Field( default=False, description="Whether this system uses cookie storage." @@ -1027,49 +1063,49 @@ class System(FidesModel): default=False, description="Whether the system uses non-cookie methods of storage or accessing information stored on a user's device.", ) - legitimate_interest_disclosure_url: Optional[AnyUrl] = Field( - description="A URL that points to the system's publicly accessible legitimate interest disclosure." + legitimate_interest_disclosure_url: SerializeAsAny[Optional[AnyUrlString]] = Field( + default=None, + description="A URL that points to the system's publicly accessible legitimate interest disclosure.", ) cookies: Optional[List[Cookies]] = Field( + default=None, description="System-level cookies unassociated with a data use to deliver services and functionality", ) - _sort_privacy_declarations: classmethod = validator( - "privacy_declarations", allow_reuse=True - )(sort_list_objects_by_name) + _sort_privacy_declarations: classmethod = field_validator("privacy_declarations")( # type: ignore[assignment] + sort_list_objects_by_name + ) - @validator("privacy_declarations", each_item=True) - @classmethod + @model_validator(mode="after") def privacy_declarations_reference_data_flows( - cls, - value: PrivacyDeclaration, - values: Dict, - ) -> PrivacyDeclaration: + self, + ) -> "System": """ Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must only reference the `fides_key`s of defined `DataFlow`s in said field(s). """ - - for direction in ["egress", "ingress"]: - fides_keys = getattr(value, direction, None) - if fides_keys is not None: - data_flows = values[direction] - system = values["fides_key"] - assert ( - data_flows is not None and len(data_flows) > 0 - ), f"PrivacyDeclaration '{value.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." - - for fides_key in fides_keys: - assert fides_key in [ - data_flow.fides_key for data_flow in data_flows - ], f"PrivacyDeclaration '{value.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." - - return value - - class Config: - """Class for the System config""" - - use_enum_values = True + privacy_declarations: List[PrivacyDeclaration] = self.privacy_declarations or [] + for ( + privacy_declaration + ) in privacy_declarations: # pylint:disable=not-an-iterable + for direction in ["egress", "ingress"]: + fides_keys = getattr(privacy_declaration, direction, None) + if fides_keys is not None: + data_flows = getattr(self, direction) + system = self.fides_key + assert ( + data_flows is not None and len(data_flows) > 0 + ), f"PrivacyDeclaration '{privacy_declaration.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." + + for fides_key in fides_keys: + assert fides_key in [ + data_flow.fides_key + for data_flow in data_flows # pylint:disable=not-an-iterable + ], f"PrivacyDeclaration '{privacy_declaration.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." + + return self + + model_config = ConfigDict(use_enum_values=True) # Taxonomy diff --git a/src/fideslang/parse.py b/src/fideslang/parse.py index 94462d94..9c1a25c5 100644 --- a/src/fideslang/parse.py +++ b/src/fideslang/parse.py @@ -19,7 +19,7 @@ def parse_dict( raise SystemExit(1) try: - parsed_manifest = model_map[resource_type].parse_obj(resource) + parsed_manifest = model_map[resource_type].model_validate(resource) except Exception as err: print( "Failed to parse {} from {}:\n{}".format( @@ -34,7 +34,7 @@ def load_manifests_into_taxonomy(raw_manifests: Dict[str, List[Dict]]) -> Taxono """ Parse the raw resource manifests into resource resources. """ - taxonomy = Taxonomy.parse_obj( + taxonomy = Taxonomy.model_validate( { resource_type: [ parse_dict(resource_type, resource) for resource in resource_list diff --git a/src/fideslang/relationships.py b/src/fideslang/relationships.py index b238a226..8840bbdd 100644 --- a/src/fideslang/relationships.py +++ b/src/fideslang/relationships.py @@ -75,7 +75,7 @@ def get_referenced_missing_keys(taxonomy: Taxonomy) -> Set[FidesKey]: """ referenced_keys: List[Set[FidesKey]] = [ find_referenced_fides_keys(resource) - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) ] key_set: Set[FidesKey] = set( diff --git a/src/fideslang/utils.py b/src/fideslang/utils.py index 5b64dbcb..e2c490bc 100644 --- a/src/fideslang/utils.py +++ b/src/fideslang/utils.py @@ -16,7 +16,7 @@ def get_resource_by_fides_key( return { resource_type: resource - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) if resource.fides_key == fides_key } or None diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index 6a0236c3..9c1639c0 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -3,46 +3,30 @@ """ import re from collections import Counter -from typing import Dict, Generator, List, Optional, Pattern, Set, Tuple +from typing import Annotated, Dict, List, Optional, Pattern, Set, Tuple from packaging.version import Version -from pydantic import ConstrainedStr +from pydantic import AfterValidator, AnyHttpUrl, AnyUrl, ValidationInfo + +FIDES_KEY_PATTERN = r"^[a-zA-Z0-9_.<>-]+$" class FidesValidationError(ValueError): """Custom exception for when the pydantic ValidationError can't be used.""" -class FidesVersion(Version): - """Validate strings as proper semantic versions.""" - - @classmethod - def __get_validators__(cls) -> Generator: - yield cls.validate - - @classmethod - def validate(cls, value: str) -> Version: - """Validates that the provided string is a valid Semantic Version.""" - return Version(value) - - -class FidesKey(ConstrainedStr): - """ - A FidesKey type that creates a custom constrained string. - """ +def validate_fides_key(value: str) -> str: + """Throws ValueError if val is not a valid FidesKey""" - regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$") + regex: Pattern[str] = re.compile(FIDES_KEY_PATTERN) + if not regex.match(value): + raise FidesValidationError( + f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" + ) + return value - @classmethod # This overrides the default method to throw the custom FidesValidationError - def validate(cls, value: str) -> str: - """Throws ValueError if val is not a valid FidesKey""" - if not cls.regex.match(value): - raise FidesValidationError( - f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" - ) - - return value +FidesKey = Annotated[str, AfterValidator(validate_fides_key)] def sort_list_objects_by_name(values: List) -> List: @@ -73,22 +57,22 @@ def unique_items_in_list(values: List) -> List: return values -def no_self_reference(value: FidesKey, values: Dict) -> FidesKey: +def no_self_reference(value: FidesKey, values: ValidationInfo) -> FidesKey: """ Check to make sure that the fides_key doesn't match other fides_key references within an object. i.e. DataCategory.parent_key != DataCategory.fides_key """ - fides_key = FidesKey.validate(values.get("fides_key", "")) + fides_key = validate_fides_key(values.data.get("fides_key", "")) if value == fides_key: - raise FidesValidationError("FidesKey can not self-reference!") + raise FidesValidationError("FidesKey cannot self-reference!") return value def deprecated_version_later_than_added( - version_deprecated: Optional[FidesVersion], values: Dict -) -> Optional[FidesVersion]: + version_deprecated: Optional[str], values: ValidationInfo +) -> Optional[str]: """ Check to make sure that the deprecated version is later than the added version. @@ -99,19 +83,27 @@ def deprecated_version_later_than_added( if not version_deprecated: return None - if version_deprecated < values.get("version_added", Version("0")): + version_added: Optional[str] = values.data.get("version_added") + + # Convert into Versions + transformed_version_added: Version = ( + Version(version_added) if version_added else Version("0") + ) + transformed_version_deprecated: Version = Version(version_deprecated) + + if transformed_version_deprecated < transformed_version_added: raise FidesValidationError( "Deprecated version number can't be earlier than version added!" ) - if version_deprecated == values.get("version_added", Version("0")): + if transformed_version_deprecated == transformed_version_added: raise FidesValidationError( "Deprecated version number can't be the same as the version added!" ) return version_deprecated -def has_versioning_if_default(is_default: bool, values: Dict) -> bool: +def has_versioning_if_default(is_default: bool, values: ValidationInfo) -> bool: """ Check to make sure that version fields are set for default items. """ @@ -119,15 +111,15 @@ def has_versioning_if_default(is_default: bool, values: Dict) -> bool: # If it's a default item, it at least needs a starting version if is_default: try: - assert values.get("version_added") + assert values.data.get("version_added") except AssertionError: raise FidesValidationError("Default items must have version information!") # If it's not default, it shouldn't have version info else: try: - assert not values.get("version_added") - assert not values.get("version_deprecated") - assert not values.get("replaced_by") + assert not values.data.get("version_added") + assert not values.data.get("version_deprecated") + assert not values.data.get("replaced_by") except AssertionError: raise FidesValidationError( "Non-default items can't have version information!" @@ -136,23 +128,23 @@ def has_versioning_if_default(is_default: bool, values: Dict) -> bool: return is_default -def is_deprecated_if_replaced(replaced_by: str, values: Dict) -> str: +def is_deprecated_if_replaced(replaced_by: str, values: ValidationInfo) -> str: """ Check to make sure that the item has been deprecated if there is a replacement. """ - if replaced_by and not values.get("version_deprecated"): + if replaced_by and not values.data.get("version_deprecated"): raise FidesValidationError("Cannot be replaced without deprecation!") return replaced_by -def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: +def matching_parent_key(parent_key: FidesKey, values: ValidationInfo) -> FidesKey: """ Confirm that the parent_key matches the parent parsed from the FidesKey. """ - fides_key = FidesKey.validate(values.get("fides_key", "")) + fides_key = validate_fides_key(values.data.get("fides_key", "")) split_fides_key = fides_key.split(".") # Check if it is a top-level resource @@ -163,7 +155,7 @@ def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: parent_key_from_fides_key = ".".join(split_fides_key[:-1]) if parent_key_from_fides_key != parent_key: raise FidesValidationError( - "The parent_key ({0}) does match the parent parsed ({1}) from the fides_key ({2})!".format( + "The parent_key ({0}) does not match the parent parsed ({1}) from the fides_key ({2})!".format( parent_key, parent_key_from_fides_key, fides_key ) ) @@ -211,3 +203,19 @@ def valid_data_type(data_type_str: Optional[str]) -> Optional[str]: raise ValueError(f"The data type {data_type_str} is not supported.") return data_type_str + + +def validate_path_of_url(value: AnyUrl) -> str: + """Converts an AnyUrl to a string""" + return str(value) + + +AnyUrlString = Annotated[AnyUrl, AfterValidator(validate_path_of_url)] + + +def validate_path_of_http_url(value: AnyHttpUrl) -> str: + """Converts an AnyHttpUrl to a string""" + return str(value) + + +AnyHttpUrlString = Annotated[AnyHttpUrl, AfterValidator(validate_path_of_http_url)] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/conftest.py b/tests/conftest.py index 6dbebc9c..2f177e9c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,10 @@ from fideslang import models +def assert_error_message_includes(exception_info, error_excerpt): + assert error_excerpt in str(exception_info.value) + + @pytest.fixture(scope="session") def resources_dict(): """ @@ -16,14 +20,14 @@ def resources_dict(): """ resources_dict: Dict[str, Any] = { "data_category": models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom", parent_key="user", name="Custom Data Category", description="Custom Data Category", ), "dataset": models.Dataset( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_sample_db_dataset", name="Sample DB Dataset", description="This is a Sample Database Dataset", @@ -53,13 +57,13 @@ def resources_dict(): ], ), "data_subject": models.DataSubject( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_subject", name="Custom Data Subject", description="Custom Data Subject", ), "data_use": models.DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_data_use", name="Custom Data Use", description="Custom Data Use", @@ -73,7 +77,7 @@ def resources_dict(): description="Test Organization", ), "policy": models.Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", version="1.3", @@ -87,7 +91,7 @@ def resources_dict(): data_subjects=models.PrivacyRule(matches="ANY", values=[]), ), "system": models.System( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -112,7 +116,7 @@ def test_manifests(): "dataset": [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -123,7 +127,7 @@ def test_manifests(): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", @@ -135,7 +139,7 @@ def test_manifests(): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -145,7 +149,7 @@ def test_manifests(): "system": [ { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/gvl/test_gvl.py b/tests/fideslang/gvl/test_gvl.py index abda6fd2..633d13b9 100644 --- a/tests/fideslang/gvl/test_gvl.py +++ b/tests/fideslang/gvl/test_gvl.py @@ -67,19 +67,15 @@ def test_feature_id_to_feature_name(): assert feature_id_to_feature_name(feature_id=1001) is None - def test_data_category_id_to_data_categories(): - assert data_category_id_to_data_categories(1) == [ - "user.device.ip_address" - ] + assert data_category_id_to_data_categories(1) == ["user.device.ip_address"] # let's test one other data category just to be comprehensive assert data_category_id_to_data_categories(5) == [ - "user.account", - "user.unique_id", - "user.device" - ] - + "user.account", + "user.unique_id", + "user.device", + ] # assert invalid categories raise KeyErrors with pytest.raises(KeyError): diff --git a/tests/fideslang/test_manifests.py b/tests/fideslang/test_manifests.py index 5310624c..9fab7e04 100644 --- a/tests/fideslang/test_manifests.py +++ b/tests/fideslang/test_manifests.py @@ -68,7 +68,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 1", "description": "Test Dataset 1", "fides_key": "some_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -77,7 +77,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 2", "description": "Test Dataset 2", "fides_key": "another_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -86,14 +86,14 @@ def test_union_manifests(test_manifests): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", @@ -122,7 +122,7 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["dataset"], key=lambda x: x["name"]) == [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -132,7 +132,7 @@ def test_ingest_manifests(ingestion_manifest_directory): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -142,14 +142,14 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["system"], key=lambda x: x["name"]) == [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 249ef012..ff96123f 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -5,11 +5,13 @@ from fideslang import DataFlow, Dataset, Organization, PrivacyDeclaration, System from fideslang.models import ( ContactDetails, + Cookies, DataResponsibilityTitle, DatasetCollection, DatasetField, DataUse, ) +from tests.conftest import assert_error_message_includes pytestmark = mark.unit @@ -55,16 +57,25 @@ def test_dataflow_valid(self) -> None: ) def test_dataflow_user_fides_key_no_user_type(self) -> None: - with raises(ValueError): + with raises(ValueError) as exc: assert DataFlow(fides_key="user", type="system") + assert_error_message_includes( + exc, "The 'user' fides_key is required for, and requires, the type 'user'" + ) def test_dataflow_user_type_no_user_fides_key(self) -> None: - with raises(ValueError): + with raises(ValueError) as exc: assert DataFlow(fides_key="test_system_1", type="user") + assert_error_message_includes( + exc, "The 'user' fides_key is required for, and requires, the type 'user'" + ) def test_dataflow_invalid_type(self) -> None: - with raises(ValueError): + with raises(ValueError) as exc: assert DataFlow(fides_key="test_system_1", type="invalid") + assert_error_message_includes( + exc, "'type' must be one of dataset, system, user" + ) class TestPrivacyDeclaration: @@ -85,7 +96,7 @@ class TestSystem: # We need to update these tests to assert that the provided args are actually being set # as attributes on the System instance that's instantiated. def test_system_valid(self) -> None: - assert System( + system = System( description="Test Policy", egress=[ DataFlow( @@ -104,7 +115,7 @@ def test_system_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", cookies=[{"name": "test_cookie"}], privacy_declarations=[ PrivacyDeclaration( @@ -122,9 +133,53 @@ def test_system_valid(self) -> None: system_type="SYSTEM", tags=["some", "tags"], ) + assert system.name == "Test System" + assert system.fides_key == "test_system" + assert system.description == "Test Policy" + assert system.egress == [ + DataFlow( + fides_key="test_system_2", + type="system", + data_categories=[], + ) + ] + assert system.ingress == [ + DataFlow( + fides_key="test_system_3", + type="system", + data_categories=[], + ) + ] + assert system.meta == {"some": "meta stuff"} + assert system.organization_fides_key == "1" + assert system.cookies == [Cookies(name="test_cookie", path=None, domain=None)] + assert system.system_type == "SYSTEM" + assert system.tags == ["some", "tags"] + assert system.privacy_declarations == [ + PrivacyDeclaration( + name="declaration-name", + data_categories=[], + data_use="provide", + data_subjects=[], + dataset_references=None, + egress=["test_system_2"], + ingress=["test_system_3"], + features=[], + flexible_legal_basis_for_processing=True, + legal_basis_for_processing=None, + impact_assessment_location=None, + retention_period=None, + processes_special_category_data=False, + special_category_legal_basis=None, + data_shared_with_third_parties=False, + third_parties=None, + shared_categories=[], + cookies=[Cookies(name="test_cookie", path="/", domain="example.com")], + ) + ] def test_system_valid_nested_meta(self) -> None: - assert System( + system = System( description="Test Policy", egress=[ DataFlow( @@ -154,7 +209,7 @@ def test_system_valid_nested_meta(self) -> None: }, }, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -168,6 +223,18 @@ def test_system_valid_nested_meta(self) -> None: system_type="SYSTEM", tags=["some", "tags"], ) + assert system.meta == { + "some": "meta stuff", + "some": { + "nested": "meta stuff", + "more nested": "meta stuff", + }, + "some more": { + "doubly": { + "nested": "meta stuff", + } + }, + } def test_system_valid_no_meta(self) -> None: system = System( @@ -189,7 +256,7 @@ def test_system_valid_no_meta(self) -> None: ], # purposefully omitting the `meta` property to ensure it's effectively optional name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -211,7 +278,7 @@ def test_system_valid_no_egress_or_ingress(self) -> None: fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -225,7 +292,7 @@ def test_system_valid_no_egress_or_ingress(self) -> None: ) def test_system_no_egress(self) -> None: - with raises(ValueError): + with raises(ValueError) as exc: assert System( description="Test Policy", fides_key="test_system", @@ -238,7 +305,7 @@ def test_system_no_egress(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -252,9 +319,13 @@ def test_system_no_egress(self) -> None: system_type="SYSTEM", tags=["some", "tags"], ) + assert_error_message_includes( + exc, + "PrivacyDeclaration 'declaration-name' defines egress with one or more resources and is applied to the System 'test_system', which does not itself define any egress.", + ) def test_system_no_ingress(self) -> None: - with raises(ValueError): + with raises(ValueError) as exc: assert System( description="Test Policy", egress=[ @@ -267,7 +338,7 @@ def test_system_no_ingress(self) -> None: fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -281,6 +352,10 @@ def test_system_no_ingress(self) -> None: system_type="SYSTEM", tags=["some", "tags"], ) + assert_error_message_includes( + exc, + "PrivacyDeclaration 'declaration-name' defines ingress with one or more resources and is applied to the System 'test_system', which does not itself define any ingress.", + ) def test_system_user_ingress_valid(self) -> None: assert System( @@ -295,7 +370,7 @@ def test_system_user_ingress_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -310,9 +385,9 @@ def test_system_user_ingress_valid(self) -> None: ) def test_expanded_system(self): - assert System( + system = System( fides_key="test_system", - organization_fides_key=1, + organization_fides_key="1", tags=["some", "tags"], name="Exponential Interactive, Inc d/b/a VDX.tv", description="My system test", @@ -405,6 +480,7 @@ def test_expanded_system(self): } ], ) + print(f"dumped={system.model_dump()}") def test_flexible_legal_basis_default(self): pd = PrivacyDeclaration( diff --git a/tests/fideslang/test_parse.py b/tests/fideslang/test_parse.py index b94e752b..5c90cd74 100644 --- a/tests/fideslang/test_parse.py +++ b/tests/fideslang/test_parse.py @@ -1,19 +1,18 @@ import pytest -from fideslang import models -from fideslang import parse +from fideslang import models, parse @pytest.mark.unit def test_parse_manifest(): expected_result = models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="some_resource", name="Test resource 1", description="Test Description", ) test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", @@ -26,7 +25,7 @@ def test_parse_manifest(): def test_parse_manifest_no_fides_key_validation_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "name": "Test resource 1", "description": "Test Description", } @@ -38,7 +37,7 @@ def test_parse_manifest_no_fides_key_validation_error(): def test_parse_manifest_resource_type_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index 2e37dfaa..bd0381a3 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -103,7 +103,7 @@ def test_find_referenced_fides_keys_1(self) -> None: assert referenced_keys == set(expected_referenced_key) def test_find_referenced_fides_keys_2(self) -> None: - test_system = System.construct( + test_system = System.model_construct( name="test_dc", fides_key="test_dc", description="test description", @@ -149,7 +149,7 @@ def test_get_referenced_missing_keys(self): ), ], system=[ - System.construct( + System.model_construct( name="test_system", fides_key="test_system", description="test description", diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 2131083f..698d983a 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -1,5 +1,5 @@ import pytest -from pydantic import ValidationError +from pydantic import TypeAdapter, ValidationError from fideslang.models import ( CollectionMeta, @@ -21,7 +21,14 @@ PrivacyRule, System, ) -from fideslang.validation import FidesKey, FidesValidationError, valid_data_type +from fideslang.validation import ( + AnyHttpUrlString, + AnyUrlString, + FidesValidationError, + valid_data_type, + validate_fides_key, +) +from tests.conftest import assert_error_message_includes DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject] @@ -33,33 +40,39 @@ class TestVersioning: @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_default_no_versions_error(self, TaxonomyClass): """There should be version info for default items.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", is_default=True, ) + assert_error_message_includes( + exc, "Default items must have version information!" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_not_default_no_versions_error(self, TaxonomyClass): """There shouldn't be version info on a non-default item.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", version_added="1.2.3", ) + assert_error_message_includes( + exc, "Non-default items can't have version information!" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_deprecated_when_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -67,13 +80,16 @@ def test_deprecated_when_added(self, TaxonomyClass): version_added="1.2", version_deprecated="1.2", ) + assert_error_message_includes( + exc, "Deprecated version number can't be the same as the version added!" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_deprecated_after_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -81,13 +97,16 @@ def test_deprecated_after_added(self, TaxonomyClass): version_added="1.2.3", version_deprecated="0.2", ) + assert_error_message_includes( + exc, "Deprecated version number can't be earlier than version added!" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: """Try building from a dictionary with explicit None values.""" - TaxonomyClass.parse_obj( + TaxonomyClass.model_validate( { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "user", "name": "Custom Test Data", "description": "Custom Test Data Category", @@ -101,8 +120,8 @@ def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_built_with_empty_versions(self, TaxonomyClass) -> None: """Try building directly with explicit None values.""" - TaxonomyClass( - organization_fides_key=1, + tc = TaxonomyClass( + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -111,26 +130,31 @@ def test_built_with_empty_versions(self, TaxonomyClass) -> None: replaced_by=None, is_default=False, ) + assert tc.version_added is None + assert not tc.is_default @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_deprecated_not_added(self, TaxonomyClass): """Can't be deprecated without being added in an earlier version.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", is_default=True, version_deprecated="0.2", ) + assert_error_message_includes( + exc, "Default items must have version information!" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_replaced_not_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -138,12 +162,13 @@ def test_replaced_not_deprecated(self, TaxonomyClass): version_added="1.2.3", replaced_by="some.field", ) + assert_error_message_includes(exc, "Cannot be replaced without deprecation!") @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_replaced_and_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" - assert TaxonomyClass( - organization_fides_key=1, + tc = TaxonomyClass( + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -152,63 +177,71 @@ def test_replaced_and_deprecated(self, TaxonomyClass): version_deprecated="1.3", replaced_by="some.field", ) + assert tc.version_added == "1.2.3" + assert tc.version_deprecated == "1.3" + assert tc.replaced_by == "some.field" @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_version_error(self, TaxonomyClass): """Check that versions are validated.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", is_default=True, version_added="a.2.3", ) + assert_error_message_includes( + exc, "Field 'version_added' does not have a valid version" + ) @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_versions_valid(self, TaxonomyClass): """Check that versions are validated.""" - assert TaxonomyClass( - organization_fides_key=1, + tc = TaxonomyClass( + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", is_default=True, version_added="1.2.3", ) + assert tc.version_added == "1.2.3" @pytest.mark.unit def test_collections_duplicate_fields_error(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DatasetCollection( name="foo", description="Fides Generated Description for Table: foo", data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), DatasetField( - name=2, + name="2", description="Fides Generated Description for Column: 1", data_categories=[], ), DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), ], ) + assert_error_message_includes(exc, "Duplicate entries found: [1]") @pytest.mark.unit def test_dataset_duplicate_collections_error(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: Dataset( name="ds", fides_key="ds", @@ -221,7 +254,7 @@ def test_dataset_duplicate_collections_error(): data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -233,7 +266,7 @@ def test_dataset_duplicate_collections_error(): data_categories=[], fields=[ DatasetField( - name=4, + name="4", description="Fides Generated Description for Column: 4", data_categories=[], ), @@ -241,12 +274,13 @@ def test_dataset_duplicate_collections_error(): ), ], ) + assert_error_message_includes(exc, "Duplicate entries found: [foo]") @pytest.mark.unit def test_top_level_resource(): DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -256,117 +290,125 @@ def test_top_level_resource(): @pytest.mark.unit def test_fides_key_doesnt_match_stated_parent_key(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", parent_key="user.account", ) - assert DataCategory + assert_error_message_includes( + exc, + "The parent_key (user.account) does not match the parent parsed (user) from the fides_key (user.custom_test_data)!", + ) @pytest.mark.unit def test_fides_key_matches_stated_parent_key(): - DataCategory( - organization_fides_key=1, + dc = DataCategory( + organization_fides_key="1", fides_key="user.account.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", parent_key="user.account", ) - assert DataCategory + assert dc.fides_key == "user.account.custom_test_data" + assert dc.parent_key == "user.account" @pytest.mark.unit def test_no_parent_key_but_fides_key_contains_parent_key(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", ) - assert DataCategory + assert_error_message_includes( + exc, "The parent_key (None) does not match the parent parsed" + ) @pytest.mark.unit def test_fides_key_with_carets(): - DataCategory( - organization_fides_key=1, + dc = DataCategory( + organization_fides_key="1", fides_key="", name="Example valid key with brackets", description="This key contains a <> which is valid", ) - assert DataCategory + assert dc.fides_key == "" @pytest.mark.unit def test_invalid_chars_in_fides_key(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="!", name="Example invalid key", description="This key contains a ! so it is invalid", ) - assert DataCategory + assert_error_message_includes( + exc, "FidesKeys must only contain alphanumeric characters" + ) @pytest.mark.unit def test_create_valid_data_category(): - DataCategory( - organization_fides_key=1, + dc = DataCategory( + organization_fides_key="1", fides_key="user.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", parent_key="user", ) - assert DataCategory + assert dc.name == "Custom Test Data" @pytest.mark.unit def test_circular_dependency_data_category(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="User Data", description="Test Data Category", parent_key="user", ) - assert True + assert_error_message_includes(exc, "FidesKey cannot self-reference!") @pytest.mark.unit def test_create_valid_data_use(): - DataUse( - organization_fides_key=1, + du = DataUse( + organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", parent_key="provide", description="Test Data Use", ) - assert True + assert du.name == "Provide the Product or Service" @pytest.mark.unit def test_circular_dependency_data_use(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", description="Test Data Use", parent_key="provide.service", ) - assert True + assert_error_message_includes(exc, "FidesKey cannot self-reference!") @pytest.mark.unit @pytest.mark.parametrize("fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"]) -def test_fides_model_valid(fides_key: str): +def test_fides_model_fides_key_valid(fides_key: str): fides_key = FidesModel(fides_key=fides_key, name="Foo Bar") assert fides_key @@ -375,8 +417,11 @@ def test_fides_model_valid(fides_key: str): @pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) def test_fides_model_fides_key_invalid(fides_key): """Check for a bunch of different possible bad characters here.""" - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: FidesModel(fides_key=fides_key) + assert_error_message_includes( + exc, "FidesKeys must only contain alphanumeric characters" + ) @pytest.mark.unit @@ -387,22 +432,24 @@ def test_valid_privacy_rule(): @pytest.mark.unit def test_invalid_fides_key_privacy_rule(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: PrivacyRule(matches="ANY", values=["foo^bar"]) - assert True + assert_error_message_includes( + exc, "FidesKeys must only contain alphanumeric characters" + ) @pytest.mark.unit def test_invalid_matches_privacy_rule(): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: PrivacyRule(matches="AN", values=["foo_bar"]) - assert True + assert_error_message_includes(exc, "Input should be 'ANY'") @pytest.mark.unit def test_valid_policy_rule(): assert PolicyRule( - organization_fides_key=1, + organization_fides_key="1", policyId=1, fides_key="test_policy", name="Test Policy", @@ -416,20 +463,19 @@ def test_valid_policy_rule(): @pytest.mark.unit def test_valid_policy(): Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", version="1.3", description="Test Policy", rules=[], ) - assert True @pytest.mark.unit def test_create_valid_system(): System( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -450,47 +496,45 @@ def test_create_valid_system(): ), ], ) - assert True - - - @pytest.mark.unit def test_fides_key_validate_bad_key(): with pytest.raises(FidesValidationError): - FidesKey.validate("hi!") + validate_fides_key("hi!") @pytest.mark.unit def test_fides_key_validate_good_key(): - FidesKey.validate("hello_test_file.txt") + validate_fides_key("hello_test_file.txt") @pytest.mark.unit class TestFidesDatasetReference: def test_dataset_invalid(self): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: FidesDatasetReference(dataset="bad fides key!", field="test_field") + assert_error_message_includes( + exc, "FidesKeys must only contain alphanumeric characters" + ) def test_invalid_direction(self): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: FidesDatasetReference( dataset="test_dataset", field="test_field", direction="backwards" ) + assert_error_message_includes(exc, "Input should be 'from' or 'to'") def valid_dataset_reference_to(self): ref = FidesDatasetReference( dataset="test_dataset", field="test_field", direction="to" ) - assert ref def valid_dataset_reference_from(self): ref = FidesDatasetReference( dataset="test_dataset", field="test_field", direction="from" ) - assert ref def valid_dataset_reference_no_direction(self): @@ -617,8 +661,8 @@ def test_valid_length(self): class TestValidateDatasetField: - def test_return_all_elements_not_string_field(self): - with pytest.raises(ValidationError): + def test_return_all_elements_not_array_field(self): + with pytest.raises(ValidationError) as exc: DatasetField( name="test_field", fides_meta=FidesMeta( @@ -631,6 +675,10 @@ def test_return_all_elements_not_string_field(self): read_only=None, ), ) + assert_error_message_includes( + exc, + "The 'return_all_elements' attribute can only be specified on array fields.", + ) def test_return_all_elements_on_array_field(self): assert DatasetField( @@ -662,8 +710,8 @@ def test_data_categories_at_object_level(self): ), fields=[DatasetField(name="nested_field")], ) - assert "Object field 'test_field' cannot have specified data_categories" in str( - exc + assert_error_message_includes( + exc, "Object field 'test_field' cannot have specified data_categories" ) def test_object_field_conflicting_types(self): @@ -682,9 +730,8 @@ def test_object_field_conflicting_types(self): ), fields=[DatasetField(name="nested_field")], ) - assert ( - "The data type 'string' on field 'test_field' is not compatible with specified sub-fields." - in str(exc) + assert_error_message_includes( + exc, "The data type 'string' on field 'test_field' is not compatible with" ) def test_data_categories_on_nested_fields(self): @@ -704,14 +751,130 @@ def test_data_categories_on_nested_fields(self): class TestCollectionMeta: def test_invalid_collection_key(self): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: CollectionMeta(after=[FidesCollectionKey("test_key")]) + assert_error_message_includes( + exc, "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + ) def test_collection_key_has_too_many_components(self): - with pytest.raises(ValidationError): + with pytest.raises(ValidationError) as exc: CollectionMeta( after=[FidesCollectionKey("test_dataset.test_collection.test_field")] ) + assert_error_message_includes( + exc, "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + ) def test_valid_collection_key(self): CollectionMeta(after=[FidesCollectionKey("test_dataset.test_collection")]) + + +class TestAnyUrlString: + def test_valid_url(self): + assert AnyUrlString("https://www.example.com/") + + def test_invalid_url(self): + with pytest.raises(ValidationError) as exc: + AnyUrlString("invalid_url") + + assert_error_message_includes(exc, "Input should be a valid URL") + + def test_validate_url(self): + assert ( + TypeAdapter(AnyUrlString).validate_python("ftp://user:password@host") + == "ftp://user:password@host/" + ), "Trailing slash added" + assert ( + TypeAdapter(AnyUrlString).validate_python("ftp:user:password@host/") + == "ftp://user:password@host/" + ), "Format corrected" + assert ( + TypeAdapter(AnyUrlString).validate_python( + "ftp://user:password@host:3341/path" + ) + == "ftp://user:password@host:3341/path" + ), "No change" + assert ( + TypeAdapter(AnyUrlString).validate_python("https://www.example.com/hello") + == "https://www.example.com/hello" + ), "No change" + assert ( + TypeAdapter(AnyUrlString).validate_python("https://www.example.com/hello/") + == "https://www.example.com/hello/" + ), "No change" + + def test_system_urls(self): + system = System( + description="Test Policy", + fides_key="test_system", + name="Test System", + organization_fides_key="1", + privacy_declarations=[], + system_type="SYSTEM", + privacy_policy="https://www.example.com", + ) + + # This is a string and not a Url type, because privacy_policy is using custom type AnyUrlString. + # It also adds a trailing slash to example.com + assert system.privacy_policy == "https://www.example.com/" + + system = System( + description="Test Policy", + fides_key="test_system", + name="Test System", + organization_fides_key="1", + privacy_declarations=[], + system_type="SYSTEM", + privacy_policy="https://policy.samsungrs.com/consent/eu/nsc/privacy_policy_de.html", + legitimate_interest_disclosure_url="https://policy.samsungrs.com/consent/eu/nsc/privacy_policy_de.html#gdpr-article", + ) + + # This is a string and not a Url type, because privacy_policy is using custom type AnyUrlString. + # No trailing slash is added + assert ( + system.privacy_policy + == "https://policy.samsungrs.com/consent/eu/nsc/privacy_policy_de.html" + ) + assert ( + system.legitimate_interest_disclosure_url + == "https://policy.samsungrs.com/consent/eu/nsc/privacy_policy_de.html#gdpr-article" + ) + + +class TestAnyHttpUrlString: + def test_valid_url(self): + assert AnyHttpUrlString("https://www.example.com") + + def test_invalid_url(self): + with pytest.raises(ValidationError) as exc: + AnyHttpUrlString("invalid_url") + + assert_error_message_includes(exc, "Input should be a valid URL") + + def test_validate_path_of_url(self): + assert ( + TypeAdapter(AnyHttpUrlString).validate_python("https://www.example.com") + == "https://www.example.com/" + ), "Trailing slash added" + assert ( + TypeAdapter(AnyHttpUrlString).validate_python("https://www.example.com/") + == "https://www.example.com/" + ), "No change" + assert ( + TypeAdapter(AnyHttpUrlString).validate_python( + "https://www.example.com/hello" + ) + == "https://www.example.com/hello" + ), "No change" + assert ( + TypeAdapter(AnyHttpUrlString).validate_python( + "https://www.example.com/hello/" + ) + == "https://www.example.com/hello/" + ), "No change" + + with pytest.raises(ValidationError) as exc: + TypeAdapter(AnyHttpUrlString).validate_python("ftp://user:password@host") + + assert_error_message_includes(exc, "URL scheme should be 'http' or 'https'")