Better Type Parsing Interface

This commit is contained in:
2025-03-28 02:03:42 -03:00
parent 529a35d8bd
commit 9f9b900e27
13 changed files with 258 additions and 148 deletions

View File

@@ -1 +0,0 @@
from .jsonschema_to_pydantic import ModelSchemaBuilder

View File

@@ -1,138 +0,0 @@
from jsonschema.exceptions import SchemaError
from jsonschema.protocols import Validator
from pydantic import create_model
from pydantic.fields import Field
import warnings
from typing import Type
_base_type_mappings = {
"string": str,
"number": float,
"integer": int,
"boolean": bool,
"array": ...,
"object": ...,
}
class ModelSchemaBuilder:
@staticmethod
def build(
schema: dict,
):
try:
Validator.check_schema(schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON Schema: {e}")
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return ModelSchemaBuilder._build_model_from_properties(
schema["title"], schema["properties"], schema.get("required", [])
)
@staticmethod
def _parse_properties(
properties: dict, required_keys=None
) -> dict[str, tuple[type, Field]]:
required_keys = required_keys or []
fields = {}
for name, prop in properties.items():
fields[name] = ModelSchemaBuilder._build_field(name, prop, required_keys)
return fields
@staticmethod
def _build_field(
name, properties: dict, required_keys: list[str]
) -> tuple[type, Field]:
_field_type = None
_field_args = {}
match properties["type"]:
case "object":
_field_type, _field_args = ModelSchemaBuilder._build_field_object(
name, properties
)
case "array":
_field_type, _field_args = ModelSchemaBuilder._build_field_array(
name, properties
)
case "string":
_field_type, _field_args = ModelSchemaBuilder._build_field_string(
properties
)
case "boolean":
_field_type, _field_args = ModelSchemaBuilder._build_field_boolean(
properties
)
case "integer":
_field_type, _field_args = ModelSchemaBuilder._build_field_int(
properties
)
case "number":
_field_type, _field_args = ModelSchemaBuilder._build_field_float(
properties
)
case _:
raise ValueError(f"Unsupported type: {properties['type']}")
if description := properties.get("description"):
_field_args["description"] = description
else:
warnings.warn(
f"Property {name} is missing a description. We highly recommend adding one."
)
_default_value = ... if name in required_keys else None
return _field_type, Field(_default_value, *_field_args)
@staticmethod
def _build_field_object(name, properties: dict) -> tuple[type, dict[str, any]]:
_field_type = ModelSchemaBuilder._build_model_from_properties(
name, properties["properties"], properties.get("required", [])
)
return _field_type, {}
@staticmethod
def _build_field_array(name, properties: dict) -> tuple[type, dict[str, any]]:
_item_type = properties["items"]["type"]
if _item_type == "object":
_item_type = ModelSchemaBuilder._build_model_from_properties(
name, properties["items"]["properties"]
)
else:
_item_type = _base_type_mappings[_item_type]
return list[_item_type], {}
@staticmethod
def _build_field_string(properties: dict) -> tuple[type, dict[str, any]]:
return str, {}
@staticmethod
def _build_field_boolean(properties: dict) -> tuple[type, dict[str, any]]:
return bool, {}
@staticmethod
def _build_field_int(properties: dict) -> tuple[type, dict[str, any]]:
return int, {}
@staticmethod
def _build_field_float(properties: dict) -> tuple[type, dict[str, any]]:
return float, {}
@staticmethod
def _build_model_from_properties(
model_name: str, model_properties: dict, required_keys: list[str]
) -> Type:
properties = ModelSchemaBuilder._parse_properties(
model_properties, required_keys
)
return create_model(model_name, **properties)

View File

@@ -0,0 +1,79 @@
from jsonschema_pydantic.types import GenericTypeParser
from jsonschema.exceptions import SchemaError
from jsonschema.protocols import Validator
from pydantic import create_model
from pydantic.fields import Field
import warnings
from typing import Type
class SchemaConverter:
@staticmethod
def build(schema):
try:
Validator.check_schema(schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON Schema: {e}")
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return SchemaConverter.build_object(schema["title"], schema)
@staticmethod
def build_object(
name: str,
schema: dict,
):
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
return SchemaConverter._build_model_from_properties(
name, schema["properties"], schema.get("required", [])
)
@staticmethod
def _build_model_from_properties(
model_name: str, model_properties: dict, required_keys: list[str]
) -> Type:
properties = SchemaConverter._parse_properties(model_properties, required_keys)
return create_model(model_name, **properties)
@staticmethod
def _parse_properties(
properties: dict, required_keys=None
) -> dict[str, tuple[type, Field]]:
required_keys = required_keys or []
fields = {}
for name, prop in properties.items():
fields[name] = SchemaConverter._build_field(name, prop, required_keys)
return fields
@staticmethod
def _build_field(
name, properties: dict, required_keys: list[str]
) -> tuple[type, Field]:
_field_type, _field_args = GenericTypeParser.get_impl(
properties["type"]
).from_properties(name, properties)
_field_args = _field_args or {}
if description := properties.get("description"):
_field_args["description"] = description
else:
warnings.warn(
f"Property {name} is missing a description. We highly recommend adding one."
)
_default_value = ... if name in required_keys else None
return _field_type, Field(_default_value, **_field_args)

View File

@@ -0,0 +1,10 @@
# Exports generic type parser
from ._type_parser import GenericTypeParser
# Exports Implementations
from .int_type_parser import IntTypeParser # isort:skip
from .object_type_parser import ObjectTypeParser # isort:skip
from .string_type_parser import StringTypeParser # isort:skip
from .array_type_parser import ArrayTypeParser # isort:skip
from .boolean_type_parser import BooleanTypeParser # isort:skip
from .float_type_parser import FloatTypeParser # isort:skip

View File

@@ -0,0 +1,28 @@
from abc import ABC, abstractmethod
from typing import Generic, Self, TypeVar
T = TypeVar("T")
class GenericTypeParser(ABC, Generic[T]):
@property
@abstractmethod
def mapped_type(self) -> type[T]: ...
@property
@abstractmethod
def json_schema_type(self) -> str: ...
@staticmethod
@abstractmethod
def from_properties(
name: str, properties: dict[str, any]
) -> tuple[type[T], dict[str, any]]: ...
@classmethod
def get_impl(cls, type_name: str) -> Self:
for subcls in cls.__subclasses__():
if subcls.json_schema_type == type_name:
return subcls
raise ValueError(f"Unknown type: {type_name}")

View File

@@ -0,0 +1,21 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
from typing import TypeVar
V = TypeVar("V")
class ArrayTypeParser(GenericTypeParser):
mapped_type = list
json_schema_type = "array"
@classmethod
def from_properties(cls, name, properties):
_item_type = properties["items"]["type"]
if _item_type == "object":
_item_type = type
else:
_item_type = GenericTypeParser.get_impl(_item_type).mapped_type
return list[_item_type], {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class BooleanTypeParser(GenericTypeParser):
mapped_type = bool
json_schema_type = "boolean"
@staticmethod
def from_properties(name, properties):
return bool, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class FloatTypeParser(GenericTypeParser):
mapped_type = float
json_schema_type = "number"
@staticmethod
def from_properties(name, properties):
return float, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class IntTypeParser(GenericTypeParser):
mapped_type = int
json_schema_type = "integer"
@staticmethod
def from_properties(name, properties):
return int, {}

View File

@@ -0,0 +1,14 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class ObjectTypeParser(GenericTypeParser):
mapped_type = object
json_schema_type = "object"
@staticmethod
def from_properties(name, properties):
from jsonschema_pydantic.schema_converter import SchemaConverter
_type = SchemaConverter.build_object(name, properties)
return _type, {}

View File

@@ -0,0 +1,11 @@
from jsonschema_pydantic.types._type_parser import GenericTypeParser
class StringTypeParser(GenericTypeParser):
mapped_type = str
json_schema_type = "string"
@staticmethod
def from_properties(name, properties):
return str, {}

View File

@@ -1,4 +1,4 @@
from jsonschema_pydantic import ModelSchemaBuilder
from jsonschema_pydantic.schema_converter import SchemaConverter
from pydantic import BaseModel
@@ -9,7 +9,7 @@ def is_pydantic_model(cls):
return isinstance(cls, type) and issubclass(cls, BaseModel)
class TestConversion(TestCase):
class TestSchemaConverter(TestCase):
def test_jsonschema_to_pydantic(self):
schema = {
"title": "Person",
@@ -22,7 +22,7 @@ class TestConversion(TestCase):
"required": ["name"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertTrue(is_pydantic_model(model))
@@ -37,7 +37,7 @@ class TestConversion(TestCase):
"required": ["name"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertEqual(model(name="John", age=30).name, "John")
@@ -52,7 +52,7 @@ class TestConversion(TestCase):
"required": ["age"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertEqual(model(age=30).age, 30)
@@ -69,7 +69,7 @@ class TestConversion(TestCase):
"required": ["age"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertEqual(model(age=30).age, 30.0)
@@ -86,7 +86,7 @@ class TestConversion(TestCase):
"required": ["is_active"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertEqual(model(is_active=True).is_active, True)
@@ -103,7 +103,7 @@ class TestConversion(TestCase):
"required": ["friends"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
self.assertEqual(model(friends=["John", "Jane"]).friends, ["John", "Jane"])
@@ -125,7 +125,7 @@ class TestConversion(TestCase):
"required": ["address"],
}
model = ModelSchemaBuilder.build(schema)
model = SchemaConverter.build(schema)
obj = model(address={"street": "123 Main St", "city": "Springfield"})

53
tests/test_type_parser.py Normal file
View File

@@ -0,0 +1,53 @@
from jsonschema_pydantic.types import (
ArrayTypeParser,
FloatTypeParser,
GenericTypeParser,
IntTypeParser,
ObjectTypeParser,
StringTypeParser,
)
import unittest
class TestTypeParser(unittest.TestCase):
def test_get_impl(self):
self.assertEqual(GenericTypeParser.get_impl("integer"), IntTypeParser)
self.assertEqual(GenericTypeParser.get_impl("string"), StringTypeParser)
self.assertEqual(GenericTypeParser.get_impl("number"), FloatTypeParser)
self.assertEqual(GenericTypeParser.get_impl("object"), ObjectTypeParser)
self.assertEqual(GenericTypeParser.get_impl("array"), ArrayTypeParser)
def test_int_parser(self):
parser = IntTypeParser()
expected_definition = (int, {})
self.assertEqual(parser.from_properties("placeholder", {}), expected_definition)
def test_float_parser(self):
parser = FloatTypeParser()
expected_definition = (float, {})
self.assertEqual(parser.from_properties("placeholder", {}), expected_definition)
def test_string_parser(self):
parser = StringTypeParser()
expected_definition = (str, {})
self.assertEqual(parser.from_properties("placeholder", {}), expected_definition)
def test_object_parser(self):
parser = ObjectTypeParser()
expected_definition = (object, {})
self.assertEqual(parser.from_properties("placeholder", {}), expected_definition)
def test_array_parser(self):
parser = ArrayTypeParser()
expected_definition = (list[str], {})
properties = {"items": {"type": "string"}}
self.assertEqual(
parser.from_properties("placeholder", properties), expected_definition
)