Skip to content

allow for named custom array types #919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions schema_salad/avro/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,47 @@
return cast(Schema, self.get_prop("items"))


class NamedArraySchema(NamedSchema):
"""Avro named array schema class."""

def __init__(
self,
items: JsonDataType,
names: Names,
name: str,
namespace: Optional[str] = None,
doc: Optional[Union[str, list[str]]] = None,
other_props: Optional[PropsType] = None,
) -> None:
"""Create a NamedArraySchema object."""
# Call parent ctor
NamedSchema.__init__(self, "array", name, namespace, names, other_props)

Check warning on line 446 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L446

Added line #L446 was not covered by tests
# Add class members

if names is None:
raise SchemaParseException("Must provide Names.")

Check warning on line 450 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L450

Added line #L450 was not covered by tests
if isinstance(items, str) and names.has_name(items, None):
items_schema = cast(Schema, names.get_name(items, None))

Check warning on line 452 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L452

Added line #L452 was not covered by tests
else:
try:
items_schema = make_avsc_object(items, names)
except Exception as err:
raise SchemaParseException(

Check warning on line 457 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L454-L457

Added lines #L454 - L457 were not covered by tests
f"Items schema ({items}) not a valid Avro schema: {err}. "
f"Known names: {list(names.names.keys())})."
) from err

self.set_prop("items", items_schema)

Check warning on line 462 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L462

Added line #L462 was not covered by tests
if doc is not None:
self.set_prop("doc", doc)

Check warning on line 464 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L464

Added line #L464 was not covered by tests

# read-only properties
@property
def items(self) -> Schema:
"""Avro schema describing the array items' type."""
return cast(Schema, self.get_prop("items"))

Check warning on line 470 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L470

Added line #L470 was not covered by tests


class MapSchema(Schema):
"""Avro map schema class."""

Expand Down Expand Up @@ -740,6 +781,11 @@
if atype in VALID_TYPES:
if atype == "array":
items = json_data.get("items")
if "name" in json_data and json_data["name"]:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking that json_data["name"] is not None nor that it is an empty string is a requirement, as otherwise the metaschema fails to load (for some reason the ArraySchemas there try to load with empty name strings).

name = json_data["name"]
namespace = json_data.get("namespace", names.default_namespace)
doc = json_data.get("doc")
return NamedArraySchema(items, names, name, namespace, doc, other_props)

Check warning on line 788 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L785-L788

Added lines #L785 - L788 were not covered by tests
return ArraySchema(items, names, other_props)
elif atype == "map":
values = json_data.get("values")
Expand All @@ -748,8 +794,7 @@
namespace = json_data.get("namespace", names.default_namespace)
doc = json_data.get("doc")
return NamedMapSchema(values, names, name, namespace, doc, other_props)
else:
return MapSchema(values, names, other_props)
return MapSchema(values, names, other_props)
elif atype == "union":
schemas = json_data.get("names")
if not isinstance(schemas, list):
Expand All @@ -761,8 +806,7 @@
namespace = json_data.get("namespace", names.default_namespace)
doc = json_data.get("doc")
return NamedUnionSchema(schemas, names, name, namespace, doc)
else:
return UnionSchema(schemas, names)
return UnionSchema(schemas, names)

Check warning on line 809 in schema_salad/avro/schema.py

View check run for this annotation

Codecov / codecov/patch

schema_salad/avro/schema.py#L809

Added line #L809 was not covered by tests
if atype is None:
raise SchemaParseException(f'No "type" property: {json_data}')
raise SchemaParseException(f"Undefined type: {atype}")
Expand Down
9 changes: 6 additions & 3 deletions schema_salad/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def friendly(v: Any) -> Any:
"""Format an Avro schema into a pretty-printed representation."""
if isinstance(v, avro.schema.NamedSchema):
return avro_shortname(v.name)
if isinstance(v, avro.schema.ArraySchema):
if isinstance(v, (avro.schema.ArraySchema, avro.schema.NamedArraySchema)):
return f"array of <{friendly(v.items)}>"
if isinstance(v, (avro.schema.MapSchema, avro.schema.NamedMapSchema)):
return f"map of <{friendly(v.values)}>"
Expand Down Expand Up @@ -208,7 +208,7 @@ def validate_ex(
)
)
return False
if isinstance(expected_schema, avro.schema.ArraySchema):
if isinstance(expected_schema, (avro.schema.ArraySchema, avro.schema.NamedArraySchema)):
if isinstance(datum, MutableSequence):
for i, d in enumerate(datum):
try:
Expand Down Expand Up @@ -259,7 +259,9 @@ def validate_ex(
errors: list[SchemaSaladException] = []
checked = []
for s in expected_schema.schemas:
if isinstance(datum, MutableSequence) and not isinstance(s, avro.schema.ArraySchema):
if isinstance(datum, MutableSequence) and not isinstance(
s, (avro.schema.ArraySchema, avro.schema.NamedArraySchema)
):
continue
if isinstance(datum, MutableMapping) and not isinstance(
s, (avro.schema.RecordSchema, avro.schema.MapSchema, avro.schema.NamedMapSchema)
Expand All @@ -269,6 +271,7 @@ def validate_ex(
s,
(
avro.schema.ArraySchema,
avro.schema.NamedArraySchema,
avro.schema.RecordSchema,
avro.schema.MapSchema,
avro.schema.NamedMapSchema,
Expand Down