From 9f9b900e276942579cc70eec8e2dd246025d9e49 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Fri, 28 Mar 2025 02:03:42 -0300 Subject: [PATCH] Better Type Parsing Interface --- jsonschema_pydantic/__init__.py | 1 - jsonschema_pydantic/jsonschema_to_pydantic.py | 138 ------------------ jsonschema_pydantic/schema_converter.py | 79 ++++++++++ jsonschema_pydantic/types/__init__.py | 10 ++ jsonschema_pydantic/types/_type_parser.py | 28 ++++ .../types/array_type_parser.py | 21 +++ .../types/boolean_type_parser.py | 11 ++ .../types/float_type_parser.py | 11 ++ jsonschema_pydantic/types/int_type_parser.py | 11 ++ .../types/object_type_parser.py | 14 ++ .../types/string_type_parser.py | 11 ++ ...conversion.py => test_schema_converter.py} | 18 +-- tests/test_type_parser.py | 53 +++++++ 13 files changed, 258 insertions(+), 148 deletions(-) delete mode 100644 jsonschema_pydantic/jsonschema_to_pydantic.py create mode 100644 jsonschema_pydantic/schema_converter.py create mode 100644 jsonschema_pydantic/types/__init__.py create mode 100644 jsonschema_pydantic/types/_type_parser.py create mode 100644 jsonschema_pydantic/types/array_type_parser.py create mode 100644 jsonschema_pydantic/types/boolean_type_parser.py create mode 100644 jsonschema_pydantic/types/float_type_parser.py create mode 100644 jsonschema_pydantic/types/int_type_parser.py create mode 100644 jsonschema_pydantic/types/object_type_parser.py create mode 100644 jsonschema_pydantic/types/string_type_parser.py rename tests/{test_conversion.py => test_schema_converter.py} (88%) create mode 100644 tests/test_type_parser.py diff --git a/jsonschema_pydantic/__init__.py b/jsonschema_pydantic/__init__.py index ab5ba24..e69de29 100644 --- a/jsonschema_pydantic/__init__.py +++ b/jsonschema_pydantic/__init__.py @@ -1 +0,0 @@ -from .jsonschema_to_pydantic import ModelSchemaBuilder diff --git a/jsonschema_pydantic/jsonschema_to_pydantic.py b/jsonschema_pydantic/jsonschema_to_pydantic.py deleted file mode 100644 index 46bb146..0000000 --- a/jsonschema_pydantic/jsonschema_to_pydantic.py +++ /dev/null @@ -1,138 +0,0 @@ -from jsonschema.exceptions import SchemaError -from jsonschema.protocols import Validator -from pydantic import create_model -from pydantic.fields import Field - -import warnings -from typing import Type - -_base_type_mappings = { - "string": str, - "number": float, - "integer": int, - "boolean": bool, - "array": ..., - "object": ..., -} - - -class ModelSchemaBuilder: - @staticmethod - def build( - schema: dict, - ): - try: - Validator.check_schema(schema) - except SchemaError as e: - raise ValueError(f"Invalid JSON Schema: {e}") - - if schema["type"] != "object": - raise TypeError( - f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models." - ) - - return ModelSchemaBuilder._build_model_from_properties( - schema["title"], schema["properties"], schema.get("required", []) - ) - - @staticmethod - def _parse_properties( - properties: dict, required_keys=None - ) -> dict[str, tuple[type, Field]]: - required_keys = required_keys or [] - - fields = {} - for name, prop in properties.items(): - fields[name] = ModelSchemaBuilder._build_field(name, prop, required_keys) - - return fields - - @staticmethod - def _build_field( - name, properties: dict, required_keys: list[str] - ) -> tuple[type, Field]: - _field_type = None - _field_args = {} - - match properties["type"]: - case "object": - _field_type, _field_args = ModelSchemaBuilder._build_field_object( - name, properties - ) - case "array": - _field_type, _field_args = ModelSchemaBuilder._build_field_array( - name, properties - ) - case "string": - _field_type, _field_args = ModelSchemaBuilder._build_field_string( - properties - ) - case "boolean": - _field_type, _field_args = ModelSchemaBuilder._build_field_boolean( - properties - ) - case "integer": - _field_type, _field_args = ModelSchemaBuilder._build_field_int( - properties - ) - case "number": - _field_type, _field_args = ModelSchemaBuilder._build_field_float( - properties - ) - case _: - raise ValueError(f"Unsupported type: {properties['type']}") - - if description := properties.get("description"): - _field_args["description"] = description - else: - warnings.warn( - f"Property {name} is missing a description. We highly recommend adding one." - ) - - _default_value = ... if name in required_keys else None - return _field_type, Field(_default_value, *_field_args) - - @staticmethod - def _build_field_object(name, properties: dict) -> tuple[type, dict[str, any]]: - _field_type = ModelSchemaBuilder._build_model_from_properties( - name, properties["properties"], properties.get("required", []) - ) - return _field_type, {} - - @staticmethod - def _build_field_array(name, properties: dict) -> tuple[type, dict[str, any]]: - _item_type = properties["items"]["type"] - if _item_type == "object": - _item_type = ModelSchemaBuilder._build_model_from_properties( - name, properties["items"]["properties"] - ) - else: - _item_type = _base_type_mappings[_item_type] - - return list[_item_type], {} - - @staticmethod - def _build_field_string(properties: dict) -> tuple[type, dict[str, any]]: - return str, {} - - @staticmethod - def _build_field_boolean(properties: dict) -> tuple[type, dict[str, any]]: - return bool, {} - - @staticmethod - def _build_field_int(properties: dict) -> tuple[type, dict[str, any]]: - return int, {} - - @staticmethod - def _build_field_float(properties: dict) -> tuple[type, dict[str, any]]: - return float, {} - - @staticmethod - def _build_model_from_properties( - model_name: str, model_properties: dict, required_keys: list[str] - ) -> Type: - properties = ModelSchemaBuilder._parse_properties( - model_properties, required_keys - ) - - return create_model(model_name, **properties) diff --git a/jsonschema_pydantic/schema_converter.py b/jsonschema_pydantic/schema_converter.py new file mode 100644 index 0000000..ec048e0 --- /dev/null +++ b/jsonschema_pydantic/schema_converter.py @@ -0,0 +1,79 @@ +from jsonschema_pydantic.types import GenericTypeParser + +from jsonschema.exceptions import SchemaError +from jsonschema.protocols import Validator +from pydantic import create_model +from pydantic.fields import Field + +import warnings +from typing import Type + + +class SchemaConverter: + @staticmethod + def build(schema): + try: + Validator.check_schema(schema) + except SchemaError as e: + raise ValueError(f"Invalid JSON Schema: {e}") + + if schema["type"] != "object": + raise TypeError( + f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models." + ) + + return SchemaConverter.build_object(schema["title"], schema) + + @staticmethod + def build_object( + name: str, + schema: dict, + ): + if schema["type"] != "object": + raise TypeError( + f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models." + ) + + return SchemaConverter._build_model_from_properties( + name, schema["properties"], schema.get("required", []) + ) + + @staticmethod + def _build_model_from_properties( + model_name: str, model_properties: dict, required_keys: list[str] + ) -> Type: + properties = SchemaConverter._parse_properties(model_properties, required_keys) + + return create_model(model_name, **properties) + + @staticmethod + def _parse_properties( + properties: dict, required_keys=None + ) -> dict[str, tuple[type, Field]]: + required_keys = required_keys or [] + + fields = {} + for name, prop in properties.items(): + fields[name] = SchemaConverter._build_field(name, prop, required_keys) + + return fields + + @staticmethod + def _build_field( + name, properties: dict, required_keys: list[str] + ) -> tuple[type, Field]: + _field_type, _field_args = GenericTypeParser.get_impl( + properties["type"] + ).from_properties(name, properties) + + _field_args = _field_args or {} + + if description := properties.get("description"): + _field_args["description"] = description + else: + warnings.warn( + f"Property {name} is missing a description. We highly recommend adding one." + ) + + _default_value = ... if name in required_keys else None + return _field_type, Field(_default_value, **_field_args) diff --git a/jsonschema_pydantic/types/__init__.py b/jsonschema_pydantic/types/__init__.py new file mode 100644 index 0000000..99de370 --- /dev/null +++ b/jsonschema_pydantic/types/__init__.py @@ -0,0 +1,10 @@ +# Exports generic type parser +from ._type_parser import GenericTypeParser + +# Exports Implementations +from .int_type_parser import IntTypeParser # isort:skip +from .object_type_parser import ObjectTypeParser # isort:skip +from .string_type_parser import StringTypeParser # isort:skip +from .array_type_parser import ArrayTypeParser # isort:skip +from .boolean_type_parser import BooleanTypeParser # isort:skip +from .float_type_parser import FloatTypeParser # isort:skip diff --git a/jsonschema_pydantic/types/_type_parser.py b/jsonschema_pydantic/types/_type_parser.py new file mode 100644 index 0000000..2a76d01 --- /dev/null +++ b/jsonschema_pydantic/types/_type_parser.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod +from typing import Generic, Self, TypeVar + +T = TypeVar("T") + + +class GenericTypeParser(ABC, Generic[T]): + @property + @abstractmethod + def mapped_type(self) -> type[T]: ... + + @property + @abstractmethod + def json_schema_type(self) -> str: ... + + @staticmethod + @abstractmethod + def from_properties( + name: str, properties: dict[str, any] + ) -> tuple[type[T], dict[str, any]]: ... + + @classmethod + def get_impl(cls, type_name: str) -> Self: + for subcls in cls.__subclasses__(): + if subcls.json_schema_type == type_name: + return subcls + + raise ValueError(f"Unknown type: {type_name}") diff --git a/jsonschema_pydantic/types/array_type_parser.py b/jsonschema_pydantic/types/array_type_parser.py new file mode 100644 index 0000000..3733ba2 --- /dev/null +++ b/jsonschema_pydantic/types/array_type_parser.py @@ -0,0 +1,21 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + +from typing import TypeVar + +V = TypeVar("V") + + +class ArrayTypeParser(GenericTypeParser): + mapped_type = list + + json_schema_type = "array" + + @classmethod + def from_properties(cls, name, properties): + _item_type = properties["items"]["type"] + if _item_type == "object": + _item_type = type + else: + _item_type = GenericTypeParser.get_impl(_item_type).mapped_type + + return list[_item_type], {} diff --git a/jsonschema_pydantic/types/boolean_type_parser.py b/jsonschema_pydantic/types/boolean_type_parser.py new file mode 100644 index 0000000..5e69c39 --- /dev/null +++ b/jsonschema_pydantic/types/boolean_type_parser.py @@ -0,0 +1,11 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + + +class BooleanTypeParser(GenericTypeParser): + mapped_type = bool + + json_schema_type = "boolean" + + @staticmethod + def from_properties(name, properties): + return bool, {} diff --git a/jsonschema_pydantic/types/float_type_parser.py b/jsonschema_pydantic/types/float_type_parser.py new file mode 100644 index 0000000..950e7d1 --- /dev/null +++ b/jsonschema_pydantic/types/float_type_parser.py @@ -0,0 +1,11 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + + +class FloatTypeParser(GenericTypeParser): + mapped_type = float + + json_schema_type = "number" + + @staticmethod + def from_properties(name, properties): + return float, {} diff --git a/jsonschema_pydantic/types/int_type_parser.py b/jsonschema_pydantic/types/int_type_parser.py new file mode 100644 index 0000000..ac5bb6d --- /dev/null +++ b/jsonschema_pydantic/types/int_type_parser.py @@ -0,0 +1,11 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + + +class IntTypeParser(GenericTypeParser): + mapped_type = int + + json_schema_type = "integer" + + @staticmethod + def from_properties(name, properties): + return int, {} diff --git a/jsonschema_pydantic/types/object_type_parser.py b/jsonschema_pydantic/types/object_type_parser.py new file mode 100644 index 0000000..e6b8296 --- /dev/null +++ b/jsonschema_pydantic/types/object_type_parser.py @@ -0,0 +1,14 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + + +class ObjectTypeParser(GenericTypeParser): + mapped_type = object + + json_schema_type = "object" + + @staticmethod + def from_properties(name, properties): + from jsonschema_pydantic.schema_converter import SchemaConverter + + _type = SchemaConverter.build_object(name, properties) + return _type, {} diff --git a/jsonschema_pydantic/types/string_type_parser.py b/jsonschema_pydantic/types/string_type_parser.py new file mode 100644 index 0000000..4fb3745 --- /dev/null +++ b/jsonschema_pydantic/types/string_type_parser.py @@ -0,0 +1,11 @@ +from jsonschema_pydantic.types._type_parser import GenericTypeParser + + +class StringTypeParser(GenericTypeParser): + mapped_type = str + + json_schema_type = "string" + + @staticmethod + def from_properties(name, properties): + return str, {} diff --git a/tests/test_conversion.py b/tests/test_schema_converter.py similarity index 88% rename from tests/test_conversion.py rename to tests/test_schema_converter.py index 5eaf4cf..0c7dfd2 100644 --- a/tests/test_conversion.py +++ b/tests/test_schema_converter.py @@ -1,4 +1,4 @@ -from jsonschema_pydantic import ModelSchemaBuilder +from jsonschema_pydantic.schema_converter import SchemaConverter from pydantic import BaseModel @@ -9,7 +9,7 @@ def is_pydantic_model(cls): return isinstance(cls, type) and issubclass(cls, BaseModel) -class TestConversion(TestCase): +class TestSchemaConverter(TestCase): def test_jsonschema_to_pydantic(self): schema = { "title": "Person", @@ -22,7 +22,7 @@ class TestConversion(TestCase): "required": ["name"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertTrue(is_pydantic_model(model)) @@ -37,7 +37,7 @@ class TestConversion(TestCase): "required": ["name"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertEqual(model(name="John", age=30).name, "John") @@ -52,7 +52,7 @@ class TestConversion(TestCase): "required": ["age"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertEqual(model(age=30).age, 30) @@ -69,7 +69,7 @@ class TestConversion(TestCase): "required": ["age"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertEqual(model(age=30).age, 30.0) @@ -86,7 +86,7 @@ class TestConversion(TestCase): "required": ["is_active"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertEqual(model(is_active=True).is_active, True) @@ -103,7 +103,7 @@ class TestConversion(TestCase): "required": ["friends"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) self.assertEqual(model(friends=["John", "Jane"]).friends, ["John", "Jane"]) @@ -125,7 +125,7 @@ class TestConversion(TestCase): "required": ["address"], } - model = ModelSchemaBuilder.build(schema) + model = SchemaConverter.build(schema) obj = model(address={"street": "123 Main St", "city": "Springfield"}) diff --git a/tests/test_type_parser.py b/tests/test_type_parser.py new file mode 100644 index 0000000..9898346 --- /dev/null +++ b/tests/test_type_parser.py @@ -0,0 +1,53 @@ +from jsonschema_pydantic.types import ( + ArrayTypeParser, + FloatTypeParser, + GenericTypeParser, + IntTypeParser, + ObjectTypeParser, + StringTypeParser, +) + +import unittest + + +class TestTypeParser(unittest.TestCase): + def test_get_impl(self): + self.assertEqual(GenericTypeParser.get_impl("integer"), IntTypeParser) + self.assertEqual(GenericTypeParser.get_impl("string"), StringTypeParser) + self.assertEqual(GenericTypeParser.get_impl("number"), FloatTypeParser) + self.assertEqual(GenericTypeParser.get_impl("object"), ObjectTypeParser) + self.assertEqual(GenericTypeParser.get_impl("array"), ArrayTypeParser) + + def test_int_parser(self): + parser = IntTypeParser() + expected_definition = (int, {}) + + self.assertEqual(parser.from_properties("placeholder", {}), expected_definition) + + def test_float_parser(self): + parser = FloatTypeParser() + expected_definition = (float, {}) + + self.assertEqual(parser.from_properties("placeholder", {}), expected_definition) + + def test_string_parser(self): + parser = StringTypeParser() + expected_definition = (str, {}) + + self.assertEqual(parser.from_properties("placeholder", {}), expected_definition) + + def test_object_parser(self): + parser = ObjectTypeParser() + expected_definition = (object, {}) + + self.assertEqual(parser.from_properties("placeholder", {}), expected_definition) + + def test_array_parser(self): + parser = ArrayTypeParser() + expected_definition = (list[str], {}) + + properties = {"items": {"type": "string"}} + + self.assertEqual( + parser.from_properties("placeholder", properties), expected_definition + )