From 18637d0058354c86361a278cba16b38f0d4666dd Mon Sep 17 00:00:00 2001 From: Tom Reitz Date: Mon, 10 Mar 2025 10:08:15 -0500 Subject: [PATCH 1/2] fix/self_reference_validation This PR fixes two bugs that prevented self-references (such as `objectiveAssessment.parentObjectiveAssessmentReference` from validating correctly: 1. reference fields were resolved to an endpoint by simply removing the `Reference` suffix, which obviously doesn't work for `parentObjectiveReference`s 2. cache keys (which are used to efficiently look up whether a referenced payload has been found) were sensitive to dict key ordering This PR fixes both bugs and ensures that self-references validate correctly. --- lightbeam/validate.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lightbeam/validate.py b/lightbeam/validate.py index 3befc06..2e48d82 100644 --- a/lightbeam/validate.py +++ b/lightbeam/validate.py @@ -146,7 +146,7 @@ def load_references_structure(self, swagger, definition): prefixes_to_remove = ["#/definitions/", "#/components/schemas/"] for k in schema["properties"].keys(): if k.endswith("Reference"): - original_endpoint = util.pluralize_endpoint(k.replace("Reference", "")) + original_endpoint = self.resolve_reference_to_endpoint(k) # this deals with the fact that an educationOrganizationReference may be to a school, LEA, etc.: endpoints_to_check = self.EDFI_GENERICS_TO_RESOURCES_MAPPING.get(original_endpoint, [original_endpoint]) @@ -409,7 +409,7 @@ def has_invalid_references(self, payload, path=""): if value!="": return value elif isinstance(payload[k], dict) and k.endswith("Reference"): is_valid_reference = False - original_endpoint = util.pluralize_endpoint(k.replace("Reference","")) + original_endpoint = self.resolve_reference_to_endpoint(k) # this deals with the fact that an educationOrganizationReference may be to a school, LEA, etc.: endpoints_to_check = self.EDFI_GENERICS_TO_RESOURCES_MAPPING.get(original_endpoint, [original_endpoint]) @@ -434,6 +434,19 @@ def has_invalid_references(self, payload, path=""): return f"payload contains an invalid {k} " + (" (at "+path+"): " if path!="" else ": ") + json.dumps(params) return "" + @staticmethod + def resolve_reference_to_endpoint(referenceName): + endpoint = referenceName + # remove final "Reference" + if endpoint.endswith("Reference"): + endpoint = endpoint[:-1*len("Reference")] + # remove leading "parent" if whole endpoint name isn't just "parent" + # (this handles things like parentObjectiveAssessmentReference) + if endpoint.startswith("parent") and endpoint!="parent": + endpoint = endpoint[len("parent"):] + endpoint = endpoint[0].lower() + endpoint[1:] + return util.pluralize_endpoint(endpoint) + # Tells you if a specified descriptor value is valid or not def is_valid_descriptor_value(self, namespace, codeValue): for row in self.lightbeam.api.descriptor_values: @@ -444,7 +457,9 @@ def is_valid_descriptor_value(self, namespace, codeValue): @staticmethod def get_cache_key(payload): cache_key = '' - for k in payload.keys(): + payload_keys = list(payload.keys()) + payload_keys.sort() + for k in payload_keys: cache_key += f"{payload[k]}~~~" return cache_key From b8ad7f9f01cd35fb7537cd0ddb05abca59df86bf Mon Sep 17 00:00:00 2001 From: Tom Reitz Date: Mon, 10 Mar 2025 10:18:03 -0500 Subject: [PATCH 2/2] fix bugs line_number bugs --- lightbeam/validate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lightbeam/validate.py b/lightbeam/validate.py index 2e48d82..9e738c6 100644 --- a/lightbeam/validate.py +++ b/lightbeam/validate.py @@ -217,10 +217,12 @@ async def validate_endpoint(self, endpoint): for file_name in data_files: self.logger.info(f"validating {file_name} against {definition} schema...") + file_counter = 0 with open(file_name) as file: for i, line in enumerate(file): line_number = i + 1 total_counter += 1 + file_counter += 1 data = line.strip() tasks.append(asyncio.create_task( @@ -255,7 +257,7 @@ async def validate_endpoint(self, endpoint): num_others = self.lightbeam.num_errors - self.MAX_VALIDATION_ERRORS_TO_DISPLAY if self.lightbeam.num_errors > self.MAX_VALIDATION_ERRORS_TO_DISPLAY: self.logger.warn(f"... and {num_others} others!") - self.logger.warn(f"... VALIDATION ERRORS on {self.lightbeam.num_errors} of {line_counter} lines in {file_name}; see details above.") + self.logger.warn(f"... VALIDATION ERRORS on {self.lightbeam.num_errors} of {file_counter} lines in {file_name}; see details above.") # free up some memory self.uniqueness_hashes = {} @@ -298,7 +300,7 @@ async def do_validate_payload(self, endpoint, file_name, data, line_number): if "uniqueness" in self.validation_methods: error_message = self.violates_uniqueness(endpoint, payload, path="") if error_message != "": - self.log_validation_error(endpoint, file_name, line_counter, "uniqueness", error_message) + self.log_validation_error(endpoint, file_name, line_number, "uniqueness", error_message) # check references values are valid if "references" in self.validation_methods and "Descriptor" not in endpoint: # Descriptors have no references