diff --git a/README.md b/README.md index 8404244..8dba275 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ Created to simplifying the process of dynamically generating Pydantic models for ## โœจ Features -- โœ… Convert JSON Schema into Pydantic models dynamically -- ๐Ÿ”’ Supports validation for strings, integers, floats, booleans, arrays, and nested objects -- โš™๏ธ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more -- ๐Ÿ“ฆ Zero config โ€” just pass your schema and get a model +- โœ… Convert JSON Schema into Pydantic models dynamically; +- ๐Ÿ”’ Supports validation for strings, integers, floats, booleans, arrays, nested objects, allOf, anyOf and ref; +- โš™๏ธ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more; +- ๐Ÿ“ฆ Zero config โ€” just pass your schema and get a model. --- @@ -45,7 +45,8 @@ pip install jambo ## ๐Ÿš€ Usage ```python -from jambo.schema_converter import SchemaConverter +from jambo import SchemaConverter + schema = { "title": "Person", @@ -70,6 +71,9 @@ print(obj) ### Strings with constraints ```python +from jambo import SchemaConverter + + schema = { "title": "EmailExample", "type": "object", @@ -92,6 +96,9 @@ print(obj) ### Integers with bounds ```python +from jambo import SchemaConverter + + schema = { "title": "AgeExample", "type": "object", @@ -109,6 +116,9 @@ print(obj) ### Nested Objects ```python +from jambo import SchemaConverter + + schema = { "title": "NestedObjectExample", "type": "object", @@ -130,6 +140,41 @@ obj = Model(address={"street": "Main St", "city": "Gotham"}) print(obj) ``` +### References + +```python +from jambo import SchemaConverter + + +schema = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, +} + +model = SchemaConverter.build(schema) + +obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), +) +``` + --- ## ๐Ÿงช Running Tests @@ -171,8 +216,6 @@ poe create-hooks ## ๐Ÿ“Œ Roadmap / TODO - [ ] Support for `enum` and `const` -- [ ] Support for `anyOf`, `allOf`, `oneOf` -- [ ] Schema ref (`$ref`) resolution - [ ] Better error reporting for unsupported schema types --- diff --git a/jambo/parser/__init__.py b/jambo/parser/__init__.py index 86d8f56..b804339 100644 --- a/jambo/parser/__init__.py +++ b/jambo/parser/__init__.py @@ -9,6 +9,7 @@ from .boolean_type_parser import BooleanTypeParser from .float_type_parser import FloatTypeParser from .int_type_parser import IntTypeParser from .object_type_parser import ObjectTypeParser +from .ref_type_parser import RefTypeParser from .string_type_parser import StringTypeParser @@ -22,4 +23,5 @@ __all__ = [ "IntTypeParser", "ObjectTypeParser", "StringTypeParser", + "RefTypeParser", ] diff --git a/jambo/parser/_type_parser.py b/jambo/parser/_type_parser.py index dbe098a..080965c 100644 --- a/jambo/parser/_type_parser.py +++ b/jambo/parser/_type_parser.py @@ -33,7 +33,7 @@ class GenericTypeParser(ABC, Generic[T]): def from_properties( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] - ) -> tuple[type, dict]: + ) -> tuple[T, dict]: """ Converts properties to a type and its fields properties. :param name: The name of the type. diff --git a/jambo/parser/object_type_parser.py b/jambo/parser/object_type_parser.py index 456bc4d..6833d40 100644 --- a/jambo/parser/object_type_parser.py +++ b/jambo/parser/object_type_parser.py @@ -1,8 +1,7 @@ from jambo.parser._type_parser import GenericTypeParser from jambo.types.type_parser_options import TypeParserOptions -from pydantic import Field, create_model -from pydantic.main import ModelT +from pydantic import BaseModel, Field, create_model from typing_extensions import Any, Unpack @@ -13,7 +12,7 @@ class ObjectTypeParser(GenericTypeParser): def from_properties_impl( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] - ): + ) -> tuple[type[BaseModel], dict]: type_parsing = self.to_model( name, properties.get("properties", {}), @@ -29,13 +28,14 @@ class ObjectTypeParser(GenericTypeParser): return type_parsing, type_properties + @classmethod def to_model( - self, + cls, name: str, schema: dict[str, Any], required_keys: list[str], **kwargs: Unpack[TypeParserOptions], - ) -> type[ModelT]: + ) -> type[BaseModel]: """ Converts JSON Schema object properties to a Pydantic model. :param name: The name of the model. @@ -43,11 +43,12 @@ class ObjectTypeParser(GenericTypeParser): :param required_keys: List of required keys in the schema. :return: A Pydantic model class. """ - fields = self._parse_properties(schema, required_keys, **kwargs) + fields = cls._parse_properties(schema, required_keys, **kwargs) return create_model(name, **fields) - @staticmethod + @classmethod def _parse_properties( + cls, properties: dict[str, Any], required_keys: list[str], **kwargs: Unpack[TypeParserOptions], diff --git a/jambo/parser/ref_type_parser.py b/jambo/parser/ref_type_parser.py new file mode 100644 index 0000000..57abeac --- /dev/null +++ b/jambo/parser/ref_type_parser.py @@ -0,0 +1,125 @@ +from jambo.parser import GenericTypeParser +from jambo.types.type_parser_options import TypeParserOptions + +from typing_extensions import Any, ForwardRef, Literal, TypeVar, Union, Unpack + + +RefType = TypeVar("RefType", bound=Union[type, ForwardRef]) + +RefStrategy = Literal["forward_ref", "def_ref"] + + +class RefTypeParser(GenericTypeParser): + json_schema_type = "$ref" + + def from_properties_impl( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[RefType, dict]: + if "$ref" not in properties: + raise ValueError(f"RefTypeParser: Missing $ref in properties for {name}") + + context = kwargs.get("context") + if context is None: + raise RuntimeError( + f"RefTypeParser: Missing `content` in properties for {name}" + ) + + ref_cache = kwargs.get("ref_cache") + if ref_cache is None: + raise RuntimeError( + f"RefTypeParser: Missing `ref_cache` in properties for {name}" + ) + + mapped_properties = self.mappings_properties_builder(properties, **kwargs) + + ref_strategy, ref_name, ref_property = self._examine_ref_strategy( + name, properties, **kwargs + ) + + ref_state = self._get_ref_from_cache(ref_name, ref_cache) + if ref_state is not None: + # If the reference is either processing or already cached + return ref_state, mapped_properties + + ref_cache[ref_name] = self._parse_from_strategy( + ref_strategy, ref_name, ref_property, **kwargs + ) + + return ref_cache[ref_name], mapped_properties + + def _parse_from_strategy( + self, + ref_strategy: RefStrategy, + ref_name: str, + ref_property: dict[str, Any], + **kwargs: Unpack[TypeParserOptions], + ): + match ref_strategy: + case "forward_ref": + mapped_type = ForwardRef(ref_name) + case "def_ref": + mapped_type, _ = GenericTypeParser.type_from_properties( + ref_name, ref_property, **kwargs + ) + case _: + raise ValueError( + f"RefTypeParser: Unsupported $ref {ref_property['$ref']}" + ) + + return mapped_type + + def _get_ref_from_cache( + self, ref_name: str, ref_cache: dict[str, type] + ) -> RefType | type | None: + try: + ref_state = ref_cache[ref_name] + + if ref_state is None: + # If the reference is being processed, we return a ForwardRef + return ForwardRef(ref_name) + + # If the reference is already cached, we return it + return ref_state + except KeyError: + # If the reference is not in the cache, we will set it to None + ref_cache[ref_name] = None + + def _examine_ref_strategy( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[RefStrategy, str, dict] | None: + if properties["$ref"] == "#": + ref_name = kwargs["context"].get("title") + if ref_name is None: + raise ValueError( + "RefTypeParser: Missing title in properties for $ref of Root Reference" + ) + return "forward_ref", ref_name, {} + + if properties["$ref"].startswith("#/$defs/"): + target_name, target_property = self._extract_target_ref( + name, properties, **kwargs + ) + return "def_ref", target_name, target_property + + raise ValueError( + "RefTypeParser: Only Root and $defs references are supported at the moment" + ) + + def _extract_target_ref( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[str, dict]: + target_name = None + target_property = kwargs["context"] + for prop_name in properties["$ref"].split("/")[1:]: + if prop_name not in target_property: + raise ValueError( + f"RefTypeParser: Missing {prop_name} in" + " properties for $ref {properties['$ref']}" + ) + target_name = prop_name + target_property = target_property[prop_name] + + if target_name is None or target_property is None: + raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}") + + return target_name, target_property diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index 0926358..6f9020e 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -1,9 +1,9 @@ -from jambo.parser import ObjectTypeParser +from jambo.parser import ObjectTypeParser, RefTypeParser from jambo.types.json_schema_type import JSONSchema from jsonschema.exceptions import SchemaError from jsonschema.validators import validator_for -from pydantic.main import ModelT +from pydantic import BaseModel class SchemaConverter: @@ -16,7 +16,7 @@ class SchemaConverter: """ @staticmethod - def build(schema: JSONSchema) -> type[ModelT]: + def build(schema: JSONSchema) -> type[BaseModel]: """ Converts a JSON Schema to a Pydantic model. :param schema: The JSON Schema to convert. @@ -32,11 +32,37 @@ class SchemaConverter: if "title" not in schema: raise ValueError("JSON Schema must have a title.") - if schema["type"] != "object": - raise TypeError( - f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models." - ) + schema_type = SchemaConverter._get_schema_type(schema) - return ObjectTypeParser().to_model( - schema["title"], schema.get("properties"), schema.get("required") - ) + match schema_type: + case "object": + return ObjectTypeParser.to_model( + schema["title"], + schema["properties"], + schema.get("required", []), + context=schema, + ref_cache=dict(), + ) + + case "$ref": + parsed_model, _ = RefTypeParser().from_properties( + schema["title"], + schema, + context=schema, + ref_cache=dict(), + ) + return parsed_model + case _: + raise TypeError(f"Unsupported schema type: {schema_type}") + + @staticmethod + def _get_schema_type(schema: JSONSchema) -> str: + """ + Returns the type of the schema. + :param schema: The JSON Schema to check. + :return: The type of the schema. + """ + if "$ref" in schema: + return "$ref" + + return schema.get("type", "undefined") diff --git a/jambo/types/type_parser_options.py b/jambo/types/type_parser_options.py index b75490e..4f7d8e0 100644 --- a/jambo/types/type_parser_options.py +++ b/jambo/types/type_parser_options.py @@ -1,5 +1,9 @@ +from jambo.types.json_schema_type import JSONSchema + from typing_extensions import TypedDict class TypeParserOptions(TypedDict): required: bool + context: JSONSchema + ref_cache: dict[str, type] diff --git a/tests/parser/test_ref_type_parser.py b/tests/parser/test_ref_type_parser.py new file mode 100644 index 0000000..3e08ff4 --- /dev/null +++ b/tests/parser/test_ref_type_parser.py @@ -0,0 +1,484 @@ +from jambo.parser import ObjectTypeParser, RefTypeParser + +from typing import ForwardRef +from unittest import TestCase + + +class TestRefTypeParser(TestCase): + def test_ref_type_parser_throws_without_ref(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_without_context(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(RuntimeError): + RefTypeParser().from_properties( + "person", + properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_without_ref_cache(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(RuntimeError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + required=True, + ) + + def test_ref_type_parser_throws_if_network_ref_type(self): + properties = { + "title": "person", + "$ref": "https://example.com/schemas/person.json", + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_non_root_or_def_ref(self): + # This is invalid because object3 is referencing object2, + # but object2 is not defined in $defs or as a root reference. + properties = { + "title": "object1", + "type": "object", + "properties": { + "object2": { + "type": "object", + "properties": { + "attr1": { + "type": "string", + }, + "attr2": { + "type": "integer", + }, + }, + }, + "object3": { + "$ref": "#/$defs/object2", + }, + }, + } + + with self.assertRaises(ValueError): + ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_def_doesnt_exists(self): + properties = { + "title": "person", + "$ref": "#/$defs/employee", + "$defs": {}, + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_ref_property_doesnt_exists(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": {"person": None}, + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_with_def(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + type_parsing, type_validator = RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + self.assertIsInstance(type_parsing, type) + + obj = type_parsing(name="John", age=30) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + + def test_ref_type_parser_with_forward_ref(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model, type_validator = ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) + + def test_ref_type_parser_invalid_forward_ref(self): + properties = { + # Doesn't have a title, which is required for forward references + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + with self.assertRaises(ValueError): + ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_forward_ref_can_checks_validation(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model, type_validator = ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + # checks if when created via FowardRef the model is validated correctly. + with self.assertRaises(ValueError): + model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + ), + ) + + def test_ref_type_parser_with_ciclic_def(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, + } + + model, type_validator = RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) + + def test_ref_type_parser_with_repeated_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + "friends": { + "type": "array", + "items": { + "$ref": "#/$defs/person", + }, + }, + }, + } + }, + } + + model, type_validator = RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + friends=[ + model(name="Alice", age=25), + model(name="Bob", age=26), + ], + ) + + self.assertEqual( + type(obj.emergency_contact), + type(obj.friends[0]), + "Emergency contact and friends should be of the same type", + ) + + def test_ref_type_parser_pre_computed_ref_cache(self): + ref_cache = {} + + parent_properties = { + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + properties1 = { + "title": "person1", + "$ref": "#/$defs/person", + } + model1, _ = RefTypeParser().from_properties( + "person", + properties1, + context=parent_properties, + ref_cache=ref_cache, + required=True, + ) + + properties2 = { + "title": "person2", + "$ref": "#/$defs/person", + } + model2, _ = RefTypeParser().from_properties( + "person", + properties2, + context=parent_properties, + ref_cache=ref_cache, + required=True, + ) + + self.assertIs(model1, model2, "Models should be the same instance") + + def test_parse_from_strategy_invalid_ref_strategy(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(ValueError): + ref_strategy, ref_name, ref_property = RefTypeParser()._parse_from_strategy( + "invalid_strategy", + "person", + properties, + ) + + def test_parse_from_strategy_forward_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + parsed_type = RefTypeParser()._parse_from_strategy( + "forward_ref", + "person", + properties, + ) + + self.assertIsInstance(parsed_type, ForwardRef) + + def test_parse_from_strategy_def_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + parsed_type = RefTypeParser()._parse_from_strategy( + "def_ref", + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = parsed_type( + name="John", + age=30, + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) diff --git a/tests/test_schema_converter.py b/tests/test_schema_converter.py index 7b10b0f..76c38ec 100644 --- a/tests/test_schema_converter.py +++ b/tests/test_schema_converter.py @@ -496,3 +496,69 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(ValueError): SchemaConverter.build(schema) + + def test_ref_with_root_ref(self): + schema = { + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model = SchemaConverter.build(schema) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) + + def test_ref_with_def(self): + schema = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, + } + + model = SchemaConverter.build(schema) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28)