Better Type Parsing Interface

This commit is contained in:
2025-03-28 02:03:42 -03:00
parent 529a35d8bd
commit 9f9b900e27
13 changed files with 258 additions and 148 deletions

View File

@@ -1 +0,0 @@
from .jsonschema_to_pydantic import ModelSchemaBuilder

View File

@@ -1,138 +0,0 @@
from jsonschema.exceptions import SchemaError
from jsonschema.protocols import Validator
from pydantic import create_model
from pydantic.fields import Field
import warnings
from typing import Type
_base_type_mappings = {
"string": str,
"number": float,
"integer": int,
"boolean": bool,
"array": ...,
"object": ...,
}
class ModelSchemaBuilder:
@staticmethod
def build(
schema: dict,
):
try:
Validator.check_schema(schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON Schema: {e}")
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return ModelSchemaBuilder._build_model_from_properties(
schema["title"], schema["properties"], schema.get("required", [])
)
@staticmethod
def _parse_properties(
properties: dict, required_keys=None
) -> dict[str, tuple[type, Field]]:
required_keys = required_keys or []
fields = {}
for name, prop in properties.items():
fields[name] = ModelSchemaBuilder._build_field(name, prop, required_keys)
return fields
@staticmethod
def _build_field(
name, properties: dict, required_keys: list[str]
) -> tuple[type, Field]:
_field_type = None
_field_args = {}
match properties["type"]:
case "object":
_field_type, _field_args = ModelSchemaBuilder._build_field_object(
name, properties
)
case "array":
_field_type, _field_args = ModelSchemaBuilder._build_field_array(
name, properties
)
case "string":
_field_type, _field_args = ModelSchemaBuilder._build_field_string(
properties
)
case "boolean":
_field_type, _field_args = ModelSchemaBuilder._build_field_boolean(
properties
)
case "integer":
_field_type, _field_args = ModelSchemaBuilder._build_field_int(
properties
)
case "number":
_field_type, _field_args = ModelSchemaBuilder._build_field_float(
properties
)
case _:
raise ValueError(f"Unsupported type: {properties['type']}")
if description := properties.get("description"):
_field_args["description"] = description
else:
warnings.warn(
f"Property {name} is missing a description. We highly recommend adding one."
)
_default_value = ... if name in required_keys else None
return _field_type, Field(_default_value, *_field_args)
@staticmethod
def _build_field_object(name, properties: dict) -> tuple[type, dict[str, any]]:
_field_type = ModelSchemaBuilder._build_model_from_properties(
name, properties["properties"], properties.get("required", [])
)
return _field_type, {}
@staticmethod
def _build_field_array(name, properties: dict) -> tuple[type, dict[str, any]]:
_item_type = properties["items"]["type"]
if _item_type == "object":
_item_type = ModelSchemaBuilder._build_model_from_properties(
name, properties["items"]["properties"]
)
else:
_item_type = _base_type_mappings[_item_type]
return list[_item_type], {}
@staticmethod
def _build_field_string(properties: dict) -> tuple[type, dict[str, any]]:
return str, {}
@staticmethod
def _build_field_boolean(properties: dict) -> tuple[type, dict[str, any]]:
return bool, {}
@staticmethod
def _build_field_int(properties: dict) -> tuple[type, dict[str, any]]:
return int, {}
@staticmethod
def _build_field_float(properties: dict) -> tuple[type, dict[str, any]]:
return float, {}
@staticmethod
def _build_model_from_properties(
model_name: str, model_properties: dict, required_keys: list[str]
) -> Type:
properties = ModelSchemaBuilder._parse_properties(
model_properties, required_keys
)
return create_model(model_name, **properties)

View File

@@ -0,0 +1,79 @@
from jsonschema_pydantic.types import GenericTypeParser
from jsonschema.exceptions import SchemaError
from jsonschema.protocols import Validator
from pydantic import create_model
from pydantic.fields import Field
import warnings
from typing import Type
class SchemaConverter:
@staticmethod
def build(schema):
try:
Validator.check_schema(schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON Schema: {e}")
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return SchemaConverter.build_object(schema["title"], schema)
@staticmethod
def build_object(
name: str,
schema: dict,
):
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return SchemaConverter._build_model_from_properties(
name, schema["properties"], schema.get("required", [])
)
@staticmethod
def _build_model_from_properties(
model_name: str, model_properties: dict, required_keys: list[str]
) -> Type:
properties = SchemaConverter._parse_properties(model_properties, required_keys)
return create_model(model_name, **properties)
@staticmethod
def _parse_properties(
properties: dict, required_keys=None
) -> dict[str, tuple[type, Field]]:
required_keys = required_keys or []
fields = {}
for name, prop in properties.items():
fields[name] = SchemaConverter._build_field(name, prop, required_keys)
return fields
@staticmethod
def _build_field(
name, properties: dict, required_keys: list[str]
) -> tuple[type, Field]:
_field_type, _field_args = GenericTypeParser.get_impl(
properties["type"]
).from_properties(name, properties)
_field_args = _field_args or {}
if description := properties.get("description"):
_field_args["description"] = description
else:
warnings.warn(
f"Property {name} is missing a description. We highly recommend adding one."
)
_default_value = ... if name in required_keys else None
return _field_type, Field(_default_value, **_field_args)

View File

@@ -0,0 +1,10 @@
# Exports generic type parser
from ._type_parser import GenericTypeParser
# Exports Implementations
from .int_type_parser import IntTypeParser # isort:skip
from .object_type_parser import ObjectTypeParser # isort:skip
from .string_type_parser import StringTypeParser # isort:skip
from .array_type_parser import ArrayTypeParser # isort:skip
from .boolean_type_parser import BooleanTypeParser # isort:skip
from .float_type_parser import FloatTypeParser # isort:skip

View File

@@ -0,0 +1,28 @@
from abc import ABC, abstractmethod
from typing import Generic, Self, TypeVar
T = TypeVar("T")
class GenericTypeParser(ABC, Generic[T]):
@property
@abstractmethod
def mapped_type(self) -> type[T]: ...
@property
@abstractmethod
def json_schema_type(self) -> str: ...
@staticmethod
@abstractmethod
def from_properties(
name: str, properties: dict[str, any]
) -> tuple[type[T], dict[str, any]]: ...
@classmethod
def get_impl(cls, type_name: str) -> Self:
for subcls in cls.__subclasses__():
if subcls.json_schema_type == type_name:
return subcls
raise ValueError(f"Unknown type: {type_name}")

View File

@@ -0,0 +1,21 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
from typing import TypeVar
V = TypeVar("V")
class ArrayTypeParser(GenericTypeParser):
mapped_type = list
json_schema_type = "array"
@classmethod
def from_properties(cls, name, properties):
_item_type = properties["items"]["type"]
if _item_type == "object":
_item_type = type
else:
_item_type = GenericTypeParser.get_impl(_item_type).mapped_type
return list[_item_type], {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class BooleanTypeParser(GenericTypeParser):
mapped_type = bool
json_schema_type = "boolean"
@staticmethod
def from_properties(name, properties):
return bool, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class FloatTypeParser(GenericTypeParser):
mapped_type = float
json_schema_type = "number"
@staticmethod
def from_properties(name, properties):
return float, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class IntTypeParser(GenericTypeParser):
mapped_type = int
json_schema_type = "integer"
@staticmethod
def from_properties(name, properties):
return int, {}

View File

@@ -0,0 +1,14 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class ObjectTypeParser(GenericTypeParser):
mapped_type = object
json_schema_type = "object"
@staticmethod
def from_properties(name, properties):
from jsonschema_pydantic.schema_converter import SchemaConverter
_type = SchemaConverter.build_object(name, properties)
return _type, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class StringTypeParser(GenericTypeParser):
mapped_type = str
json_schema_type = "string"
@staticmethod
def from_properties(name, properties):
return str, {}