From 760f30d08f31a95a0d6f2a40c335c34c8cc5ae20 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Thu, 12 Jun 2025 01:54:52 -0300 Subject: [PATCH 1/6] Initial Implementation of $ref --- jambo/parser/__init__.py | 2 + jambo/parser/ref_type_parser.py | 72 ++++++++++++++++++++++++++++ jambo/types/type_parser_options.py | 4 +- tests/parser/test_ref_type_parser.py | 34 +++++++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 jambo/parser/ref_type_parser.py create mode 100644 tests/parser/test_ref_type_parser.py diff --git a/jambo/parser/__init__.py b/jambo/parser/__init__.py index 86d8f56..b804339 100644 --- a/jambo/parser/__init__.py +++ b/jambo/parser/__init__.py @@ -9,6 +9,7 @@ from .boolean_type_parser import BooleanTypeParser from .float_type_parser import FloatTypeParser from .int_type_parser import IntTypeParser from .object_type_parser import ObjectTypeParser +from .ref_type_parser import RefTypeParser from .string_type_parser import StringTypeParser @@ -22,4 +23,5 @@ __all__ = [ "IntTypeParser", "ObjectTypeParser", "StringTypeParser", + "RefTypeParser", ] diff --git a/jambo/parser/ref_type_parser.py b/jambo/parser/ref_type_parser.py new file mode 100644 index 0000000..c27f62c --- /dev/null +++ b/jambo/parser/ref_type_parser.py @@ -0,0 +1,72 @@ +from jambo.parser import GenericTypeParser +from jambo.types.type_parser_options import TypeParserOptions + +from typing_extensions import Any, ForwardRef, TypeVar, Union, Unpack + + +RefType = TypeVar("RefType", bound=Union[int, str]) + + +class RefTypeParser(GenericTypeParser): + json_schema_type = "$ref" + + def from_properties_impl( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[RefType, dict]: + if "$ref" not in properties: + raise ValueError(f"RefTypeParser: Missing $ref in properties for {name}") + + context = kwargs["context"] + required = kwargs.get("required", False) + + if context is None: + raise RuntimeError( + f"RefTypeParser: Missing $content in properties for {name}" + ) + + if not properties["$ref"].startswith("#"): + raise ValueError( + "At the moment, only local references are supported. " + "Look into $defs and # for recursive references." + ) + + ref_type = None + mapped_properties = {} + + if properties["$ref"] == "#": + if "title" not in context: + raise ValueError( + "RefTypeParser: Missing title in properties for $ref #" + ) + + ref_type = ForwardRef(context["title"]) + + elif properties["$ref"].startswith("#/$defs/"): + target_name = None + target_property = context + for prop_name in properties["$ref"].split("/")[1:]: + if prop_name not in target_property: + raise ValueError( + f"RefTypeParser: Missing {prop_name} in" + " properties for $ref {properties['$ref']}" + ) + target_name = prop_name + target_property = target_property[prop_name] + + if target_name is None or target_property is None: + raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}") + + ref_type, mapped_properties = GenericTypeParser.type_from_properties( + target_name, target_property, **kwargs + ) + + else: + raise ValueError( + "RefTypeParser: Invalid $ref format. " + "Only local references are supported." + ) + + if not required: + mapped_properties["default"] = None + + return ref_type, mapped_properties diff --git a/jambo/types/type_parser_options.py b/jambo/types/type_parser_options.py index b75490e..ae96338 100644 --- a/jambo/types/type_parser_options.py +++ b/jambo/types/type_parser_options.py @@ -1,5 +1,7 @@ -from typing_extensions import TypedDict +from typing_extensions import Any, NotRequired, TypedDict class TypeParserOptions(TypedDict): required: bool + context: dict[str, Any] + ref_cache: NotRequired[dict[str, type]] diff --git a/tests/parser/test_ref_type_parser.py b/tests/parser/test_ref_type_parser.py new file mode 100644 index 0000000..5ab96f5 --- /dev/null +++ b/tests/parser/test_ref_type_parser.py @@ -0,0 +1,34 @@ +from jambo.parser import RefTypeParser + +from unittest import TestCase + + +class TestRefTypeParser(TestCase): + def test_ref_type_parser_local_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + type_parsing, type_validator = RefTypeParser().from_properties( + properties=properties, + name="placeholder", + context=properties, + required=True, + ) + + self.assertIsInstance(type_parsing, type) + + obj = type_parsing(name="John", age=30) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) From 188cd285860be31d255e95508689d6c3f3290056 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Thu, 12 Jun 2025 02:35:09 -0300 Subject: [PATCH 2/6] **BROKEN INITIAL FOWARDREF** --- jambo/parser/ref_type_parser.py | 20 ++-------- jambo/types/type_parser_options.py | 6 ++- tests/parser/test_ref_type_parser.py | 57 ++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 22 deletions(-) diff --git a/jambo/parser/ref_type_parser.py b/jambo/parser/ref_type_parser.py index c27f62c..8f7ff7c 100644 --- a/jambo/parser/ref_type_parser.py +++ b/jambo/parser/ref_type_parser.py @@ -4,7 +4,7 @@ from jambo.types.type_parser_options import TypeParserOptions from typing_extensions import Any, ForwardRef, TypeVar, Union, Unpack -RefType = TypeVar("RefType", bound=Union[int, str]) +RefType = TypeVar("RefType", bound=Union[type, ForwardRef]) class RefTypeParser(GenericTypeParser): @@ -30,16 +30,13 @@ class RefTypeParser(GenericTypeParser): "Look into $defs and # for recursive references." ) - ref_type = None - mapped_properties = {} - if properties["$ref"] == "#": if "title" not in context: raise ValueError( "RefTypeParser: Missing title in properties for $ref #" ) - ref_type = ForwardRef(context["title"]) + return ForwardRef(context["title"]), {} elif properties["$ref"].startswith("#/$defs/"): target_name = None @@ -56,17 +53,8 @@ class RefTypeParser(GenericTypeParser): if target_name is None or target_property is None: raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}") - ref_type, mapped_properties = GenericTypeParser.type_from_properties( + return GenericTypeParser.type_from_properties( target_name, target_property, **kwargs ) - else: - raise ValueError( - "RefTypeParser: Invalid $ref format. " - "Only local references are supported." - ) - - if not required: - mapped_properties["default"] = None - - return ref_type, mapped_properties + raise ValueError(f"RefTypeParser: Unsupported $ref {properties['$ref']}") diff --git a/jambo/types/type_parser_options.py b/jambo/types/type_parser_options.py index ae96338..eb20db2 100644 --- a/jambo/types/type_parser_options.py +++ b/jambo/types/type_parser_options.py @@ -1,7 +1,9 @@ -from typing_extensions import Any, NotRequired, TypedDict +from jambo.types.json_schema_type import JSONSchema + +from typing_extensions import NotRequired, TypedDict class TypeParserOptions(TypedDict): required: bool - context: dict[str, Any] + context: JSONSchema ref_cache: NotRequired[dict[str, type]] diff --git a/tests/parser/test_ref_type_parser.py b/tests/parser/test_ref_type_parser.py index 5ab96f5..2b276bd 100644 --- a/tests/parser/test_ref_type_parser.py +++ b/tests/parser/test_ref_type_parser.py @@ -1,10 +1,12 @@ -from jambo.parser import RefTypeParser +from jambo.parser import ObjectTypeParser, RefTypeParser + +from typing_extensions import ForwardRef, get_type_hints from unittest import TestCase class TestRefTypeParser(TestCase): - def test_ref_type_parser_local_ref(self): + def test_ref_type_parser_with_def(self): properties = { "title": "person", "$ref": "#/$defs/person", @@ -20,8 +22,8 @@ class TestRefTypeParser(TestCase): } type_parsing, type_validator = RefTypeParser().from_properties( - properties=properties, - name="placeholder", + "person", + properties, context=properties, required=True, ) @@ -32,3 +34,50 @@ class TestRefTypeParser(TestCase): self.assertEqual(obj.name, "John") self.assertEqual(obj.age, 30) + + def test_ref_type_parser_with_forward_ref(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + } + + type_parsing, type_validator = ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + required=True, + ) + type_parsing.update_forward_refs(person=type_parsing) + + self.assertIsInstance(type_parsing, type) + + type_hints = get_type_hints(type_parsing, globals(), locals()) + + self.assertIsInstance(type_hints["emergency_contact"], ForwardRef) + + """ + This is a example of how to resolve ForwardRef in a dynamic model: + ```python + from typing import get_type_hints + + # Make sure your dynamic model has a name + model = type_parsing + model.update_forward_refs(person=model) # ๐Ÿ‘ˆ resolve the ForwardRef("person") + + # Inject into globals manually + globalns = globals().copy() + globalns['person'] = model + + # Now you can get the resolved hints + type_hints = get_type_hints(model, globalns=globalns) + ``` + Use `TypeParserOptions.ref_cache` option to cache and resolve ForwardRefs + inside the ObjectTypeParser.to_model method. + """ From f4effac41cd98566fa4efb1dd1b2d7ede7284bf5 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Fri, 13 Jun 2025 01:36:16 -0300 Subject: [PATCH 3/6] Initial Working $ref Keyword with: ForwardRef, Partial Root Ref and Recursive Ref --- jambo/parser/_type_parser.py | 2 +- jambo/parser/object_type_parser.py | 15 ++-- jambo/parser/ref_type_parser.py | 86 +++++++++++++++----- jambo/schema_converter.py | 23 ++++-- jambo/types/type_parser_options.py | 4 +- tests/parser/test_ref_type_parser.py | 114 +++++++++++++++++++++------ tests/test_schema_converter.py | 53 +++++++++++++ 7 files changed, 234 insertions(+), 63 deletions(-) diff --git a/jambo/parser/_type_parser.py b/jambo/parser/_type_parser.py index dbe098a..080965c 100644 --- a/jambo/parser/_type_parser.py +++ b/jambo/parser/_type_parser.py @@ -33,7 +33,7 @@ class GenericTypeParser(ABC, Generic[T]): def from_properties( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] - ) -> tuple[type, dict]: + ) -> tuple[T, dict]: """ Converts properties to a type and its fields properties. :param name: The name of the type. diff --git a/jambo/parser/object_type_parser.py b/jambo/parser/object_type_parser.py index 456bc4d..6833d40 100644 --- a/jambo/parser/object_type_parser.py +++ b/jambo/parser/object_type_parser.py @@ -1,8 +1,7 @@ from jambo.parser._type_parser import GenericTypeParser from jambo.types.type_parser_options import TypeParserOptions -from pydantic import Field, create_model -from pydantic.main import ModelT +from pydantic import BaseModel, Field, create_model from typing_extensions import Any, Unpack @@ -13,7 +12,7 @@ class ObjectTypeParser(GenericTypeParser): def from_properties_impl( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] - ): + ) -> tuple[type[BaseModel], dict]: type_parsing = self.to_model( name, properties.get("properties", {}), @@ -29,13 +28,14 @@ class ObjectTypeParser(GenericTypeParser): return type_parsing, type_properties + @classmethod def to_model( - self, + cls, name: str, schema: dict[str, Any], required_keys: list[str], **kwargs: Unpack[TypeParserOptions], - ) -> type[ModelT]: + ) -> type[BaseModel]: """ Converts JSON Schema object properties to a Pydantic model. :param name: The name of the model. @@ -43,11 +43,12 @@ class ObjectTypeParser(GenericTypeParser): :param required_keys: List of required keys in the schema. :return: A Pydantic model class. """ - fields = self._parse_properties(schema, required_keys, **kwargs) + fields = cls._parse_properties(schema, required_keys, **kwargs) return create_model(name, **fields) - @staticmethod + @classmethod def _parse_properties( + cls, properties: dict[str, Any], required_keys: list[str], **kwargs: Unpack[TypeParserOptions], diff --git a/jambo/parser/ref_type_parser.py b/jambo/parser/ref_type_parser.py index 8f7ff7c..5229f0e 100644 --- a/jambo/parser/ref_type_parser.py +++ b/jambo/parser/ref_type_parser.py @@ -1,7 +1,7 @@ from jambo.parser import GenericTypeParser from jambo.types.type_parser_options import TypeParserOptions -from typing_extensions import Any, ForwardRef, TypeVar, Union, Unpack +from typing_extensions import Any, ForwardRef, Literal, TypeVar, Union, Unpack RefType = TypeVar("RefType", bound=Union[type, ForwardRef]) @@ -17,7 +17,10 @@ class RefTypeParser(GenericTypeParser): raise ValueError(f"RefTypeParser: Missing $ref in properties for {name}") context = kwargs["context"] - required = kwargs.get("required", False) + ref_cache = kwargs["ref_cache"] + + mapped_type = None + mapped_properties = self.mappings_properties_builder(properties, **kwargs) if context is None: raise RuntimeError( @@ -30,31 +33,72 @@ class RefTypeParser(GenericTypeParser): "Look into $defs and # for recursive references." ) - if properties["$ref"] == "#": - if "title" not in context: + ref_strategy, ref_name, ref_property = self._examine_ref_strategy( + name, properties, **kwargs + ) + + # In this code ellipsis is used to indicate that the reference is still being processed, + # If the reference is already in the cache, return it. + ref_state = ref_cache.setdefault(ref_name) + + if ref_state is Ellipsis: + return ForwardRef(ref_name), mapped_properties + elif ref_state is not None: + return ref_state, mapped_properties + else: + ref_cache[ref_name] = Ellipsis + + match ref_strategy: + case "forward_ref": + mapped_type = ForwardRef(ref_name) + case "def_ref": + mapped_type, _ = GenericTypeParser.type_from_properties( + ref_name, ref_property, **kwargs + ) + case _: raise ValueError( - "RefTypeParser: Missing title in properties for $ref #" + f"RefTypeParser: Unsupported $ref {properties['$ref']}" ) - return ForwardRef(context["title"]), {} + # Sets cached reference to the mapped type + ref_cache[ref_name] = mapped_type - elif properties["$ref"].startswith("#/$defs/"): - target_name = None - target_property = context - for prop_name in properties["$ref"].split("/")[1:]: - if prop_name not in target_property: - raise ValueError( - f"RefTypeParser: Missing {prop_name} in" - " properties for $ref {properties['$ref']}" - ) - target_name = prop_name - target_property = target_property[prop_name] + return mapped_type, mapped_properties - if target_name is None or target_property is None: - raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}") + def _examine_ref_strategy( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[Literal["forward_ref", "def_ref"], str, dict]: + if properties["$ref"] == "#": + ref_name = kwargs["context"].get("title") + if ref_name is None: + raise ValueError( + f"RefTypeParser: Missing title in properties for $ref {properties['$ref']}" + ) + return "forward_ref", ref_name, {} - return GenericTypeParser.type_from_properties( - target_name, target_property, **kwargs + if properties["$ref"].startswith("#/$defs/"): + target_name, target_property = self._extract_target_ref( + name, properties, **kwargs ) + return "def_ref", target_name, target_property raise ValueError(f"RefTypeParser: Unsupported $ref {properties['$ref']}") + + def _extract_target_ref( + self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] + ) -> tuple[str, dict]: + target_name = None + target_property = kwargs["context"] + for prop_name in properties["$ref"].split("/")[1:]: + if prop_name not in target_property: + raise ValueError( + f"RefTypeParser: Missing {prop_name} in" + " properties for $ref {properties['$ref']}" + ) + target_name = prop_name + target_property = target_property[prop_name] + + if target_name is None or target_property is None: + raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}") + + return target_name, target_property diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index 0926358..dfd7f1d 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -3,7 +3,7 @@ from jambo.types.json_schema_type import JSONSchema from jsonschema.exceptions import SchemaError from jsonschema.validators import validator_for -from pydantic.main import ModelT +from pydantic import BaseModel class SchemaConverter: @@ -16,7 +16,7 @@ class SchemaConverter: """ @staticmethod - def build(schema: JSONSchema) -> type[ModelT]: + def build(schema: JSONSchema) -> type[BaseModel]: """ Converts a JSON Schema to a Pydantic model. :param schema: The JSON Schema to convert. @@ -32,11 +32,22 @@ class SchemaConverter: if "title" not in schema: raise ValueError("JSON Schema must have a title.") - if schema["type"] != "object": + if (schema_type := schema.get("type", "undefined")) != "object": raise TypeError( - f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models." + f"Invalid JSON Schema: {schema_type}. Only 'object' can be converted to Pydantic models." ) - return ObjectTypeParser().to_model( - schema["title"], schema.get("properties"), schema.get("required") + parsed_model = ObjectTypeParser.to_model( + schema["title"], + schema.get("properties"), + schema.get("required"), + context=schema, + ref_cache=dict(), ) + + if not issubclass(parsed_model, BaseModel): + raise TypeError( + f"Parsed model {parsed_model.__name__} is not a subclass of BaseModel." + ) + + return parsed_model diff --git a/jambo/types/type_parser_options.py b/jambo/types/type_parser_options.py index eb20db2..4f7d8e0 100644 --- a/jambo/types/type_parser_options.py +++ b/jambo/types/type_parser_options.py @@ -1,9 +1,9 @@ from jambo.types.json_schema_type import JSONSchema -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict class TypeParserOptions(TypedDict): required: bool context: JSONSchema - ref_cache: NotRequired[dict[str, type]] + ref_cache: dict[str, type] diff --git a/tests/parser/test_ref_type_parser.py b/tests/parser/test_ref_type_parser.py index 2b276bd..c6ad1c3 100644 --- a/tests/parser/test_ref_type_parser.py +++ b/tests/parser/test_ref_type_parser.py @@ -1,7 +1,5 @@ from jambo.parser import ObjectTypeParser, RefTypeParser -from typing_extensions import ForwardRef, get_type_hints - from unittest import TestCase @@ -25,6 +23,7 @@ class TestRefTypeParser(TestCase): "person", properties, context=properties, + ref_cache={}, required=True, ) @@ -46,38 +45,101 @@ class TestRefTypeParser(TestCase): "$ref": "#", }, }, + "required": ["name", "age"], } - type_parsing, type_validator = ObjectTypeParser().from_properties( + model, type_validator = ObjectTypeParser().from_properties( "person", properties, context=properties, + ref_cache={}, required=True, ) - type_parsing.update_forward_refs(person=type_parsing) - self.assertIsInstance(type_parsing, type) + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) - type_hints = get_type_hints(type_parsing, globals(), locals()) + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) - self.assertIsInstance(type_hints["emergency_contact"], ForwardRef) + def test_ref_type_parser_forward_ref_can_checks_validation(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } - """ - This is a example of how to resolve ForwardRef in a dynamic model: - ```python - from typing import get_type_hints - - # Make sure your dynamic model has a name - model = type_parsing - model.update_forward_refs(person=model) # ๐Ÿ‘ˆ resolve the ForwardRef("person") - - # Inject into globals manually - globalns = globals().copy() - globalns['person'] = model - - # Now you can get the resolved hints - type_hints = get_type_hints(model, globalns=globalns) - ``` - Use `TypeParserOptions.ref_cache` option to cache and resolve ForwardRefs - inside the ObjectTypeParser.to_model method. - """ + model, type_validator = ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + # checks if when created via FowardRef the model is validated correctly. + with self.assertRaises(ValueError): + model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + ), + ) + + def test_ref_type_parser_with_ciclic_def(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, + } + + model, type_validator = RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) diff --git a/tests/test_schema_converter.py b/tests/test_schema_converter.py index 7b10b0f..c3fe409 100644 --- a/tests/test_schema_converter.py +++ b/tests/test_schema_converter.py @@ -496,3 +496,56 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(ValueError): SchemaConverter.build(schema) + + def test_ref_with_root_ref(self): + schema = { + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model = SchemaConverter.build(schema) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) + + def test_ref_with_def(self): + schema = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, + } + + with self.assertRaises(TypeError): + # This should raise TypeError because the root schema is not an object + SchemaConverter.build(schema) From 37cf59078e85c8c3771356b5fadb4ae887b93282 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Fri, 13 Jun 2025 01:48:28 -0300 Subject: [PATCH 4/6] Working Version of Root Level Reference --- jambo/schema_converter.py | 59 +++++++++++++++++++++++++++------- tests/test_schema_converter.py | 19 +++++++++-- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index dfd7f1d..3bca9c5 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -1,4 +1,4 @@ -from jambo.parser import ObjectTypeParser +from jambo.parser import ObjectTypeParser, RefTypeParser from jambo.types.json_schema_type import JSONSchema from jsonschema.exceptions import SchemaError @@ -32,18 +32,22 @@ class SchemaConverter: if "title" not in schema: raise ValueError("JSON Schema must have a title.") - if (schema_type := schema.get("type", "undefined")) != "object": - raise TypeError( - f"Invalid JSON Schema: {schema_type}. Only 'object' can be converted to Pydantic models." - ) + schema_type = SchemaConverter._get_schema_type(schema) - parsed_model = ObjectTypeParser.to_model( - schema["title"], - schema.get("properties"), - schema.get("required"), - context=schema, - ref_cache=dict(), - ) + parsed_model = None + match schema_type: + case "object": + parsed_model = SchemaConverter._from_object(schema) + case "$ref": + parsed_model, _ = RefTypeParser().from_properties( + schema["title"], + schema, + context=schema, + ref_cache=dict(), + required=True, + ) + case _: + raise TypeError(f"Unsupported schema type: {schema_type}") if not issubclass(parsed_model, BaseModel): raise TypeError( @@ -51,3 +55,34 @@ class SchemaConverter: ) return parsed_model + + @staticmethod + def _from_object(schema: JSONSchema) -> type[BaseModel]: + """ + Converts a JSON Schema object to a Pydantic model. + :param schema: The JSON Schema object to convert. + :return: A Pydantic model class. + """ + + if "properties" not in schema: + raise ValueError("JSON Schema object must have properties defined.") + + return ObjectTypeParser.to_model( + schema["title"], + schema["properties"], + schema.get("required", []), + context=schema, + ref_cache=dict(), + ) + + @staticmethod + def _get_schema_type(schema: JSONSchema) -> str: + """ + Returns the type of the schema. + :param schema: The JSON Schema to check. + :return: The type of the schema. + """ + if "$ref" in schema: + return "$ref" + + return schema.get("type", "undefined") diff --git a/tests/test_schema_converter.py b/tests/test_schema_converter.py index c3fe409..76c38ec 100644 --- a/tests/test_schema_converter.py +++ b/tests/test_schema_converter.py @@ -546,6 +546,19 @@ class TestSchemaConverter(TestCase): }, } - with self.assertRaises(TypeError): - # This should raise TypeError because the root schema is not an object - SchemaConverter.build(schema) + model = SchemaConverter.build(schema) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) + self.assertIsInstance(obj.emergency_contact, model) + self.assertEqual(obj.emergency_contact.name, "Jane") + self.assertEqual(obj.emergency_contact.age, 28) From 607555898e644718169131205fabcdc98ce17ce2 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Thu, 19 Jun 2025 00:39:54 -0300 Subject: [PATCH 5/6] Final and Tested Version of Ref --- jambo/parser/ref_type_parser.py | 75 +++--- jambo/schema_converter.py | 37 +-- tests/parser/test_ref_type_parser.py | 339 +++++++++++++++++++++++++++ 3 files changed, 396 insertions(+), 55 deletions(-) diff --git a/jambo/parser/ref_type_parser.py b/jambo/parser/ref_type_parser.py index 5229f0e..57abeac 100644 --- a/jambo/parser/ref_type_parser.py +++ b/jambo/parser/ref_type_parser.py @@ -6,6 +6,8 @@ from typing_extensions import Any, ForwardRef, Literal, TypeVar, Union, Unpack RefType = TypeVar("RefType", bound=Union[type, ForwardRef]) +RefStrategy = Literal["forward_ref", "def_ref"] + class RefTypeParser(GenericTypeParser): json_schema_type = "$ref" @@ -16,38 +18,42 @@ class RefTypeParser(GenericTypeParser): if "$ref" not in properties: raise ValueError(f"RefTypeParser: Missing $ref in properties for {name}") - context = kwargs["context"] - ref_cache = kwargs["ref_cache"] - - mapped_type = None - mapped_properties = self.mappings_properties_builder(properties, **kwargs) - + context = kwargs.get("context") if context is None: raise RuntimeError( - f"RefTypeParser: Missing $content in properties for {name}" + f"RefTypeParser: Missing `content` in properties for {name}" ) - if not properties["$ref"].startswith("#"): - raise ValueError( - "At the moment, only local references are supported. " - "Look into $defs and # for recursive references." + ref_cache = kwargs.get("ref_cache") + if ref_cache is None: + raise RuntimeError( + f"RefTypeParser: Missing `ref_cache` in properties for {name}" ) + mapped_properties = self.mappings_properties_builder(properties, **kwargs) + ref_strategy, ref_name, ref_property = self._examine_ref_strategy( name, properties, **kwargs ) - # In this code ellipsis is used to indicate that the reference is still being processed, - # If the reference is already in the cache, return it. - ref_state = ref_cache.setdefault(ref_name) - - if ref_state is Ellipsis: - return ForwardRef(ref_name), mapped_properties - elif ref_state is not None: + ref_state = self._get_ref_from_cache(ref_name, ref_cache) + if ref_state is not None: + # If the reference is either processing or already cached return ref_state, mapped_properties - else: - ref_cache[ref_name] = Ellipsis + ref_cache[ref_name] = self._parse_from_strategy( + ref_strategy, ref_name, ref_property, **kwargs + ) + + return ref_cache[ref_name], mapped_properties + + def _parse_from_strategy( + self, + ref_strategy: RefStrategy, + ref_name: str, + ref_property: dict[str, Any], + **kwargs: Unpack[TypeParserOptions], + ): match ref_strategy: case "forward_ref": mapped_type = ForwardRef(ref_name) @@ -57,22 +63,35 @@ class RefTypeParser(GenericTypeParser): ) case _: raise ValueError( - f"RefTypeParser: Unsupported $ref {properties['$ref']}" + f"RefTypeParser: Unsupported $ref {ref_property['$ref']}" ) - # Sets cached reference to the mapped type - ref_cache[ref_name] = mapped_type + return mapped_type - return mapped_type, mapped_properties + def _get_ref_from_cache( + self, ref_name: str, ref_cache: dict[str, type] + ) -> RefType | type | None: + try: + ref_state = ref_cache[ref_name] + + if ref_state is None: + # If the reference is being processed, we return a ForwardRef + return ForwardRef(ref_name) + + # If the reference is already cached, we return it + return ref_state + except KeyError: + # If the reference is not in the cache, we will set it to None + ref_cache[ref_name] = None def _examine_ref_strategy( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] - ) -> tuple[Literal["forward_ref", "def_ref"], str, dict]: + ) -> tuple[RefStrategy, str, dict] | None: if properties["$ref"] == "#": ref_name = kwargs["context"].get("title") if ref_name is None: raise ValueError( - f"RefTypeParser: Missing title in properties for $ref {properties['$ref']}" + "RefTypeParser: Missing title in properties for $ref of Root Reference" ) return "forward_ref", ref_name, {} @@ -82,7 +101,9 @@ class RefTypeParser(GenericTypeParser): ) return "def_ref", target_name, target_property - raise ValueError(f"RefTypeParser: Unsupported $ref {properties['$ref']}") + raise ValueError( + "RefTypeParser: Only Root and $defs references are supported at the moment" + ) def _extract_target_ref( self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions] diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index 3bca9c5..952cdc2 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -34,10 +34,16 @@ class SchemaConverter: schema_type = SchemaConverter._get_schema_type(schema) - parsed_model = None match schema_type: case "object": - parsed_model = SchemaConverter._from_object(schema) + return ObjectTypeParser.to_model( + schema["title"], + schema["properties"], + schema.get("required", []), + context=schema, + ref_cache=dict(), + ) + case "$ref": parsed_model, _ = RefTypeParser().from_properties( schema["title"], @@ -46,35 +52,10 @@ class SchemaConverter: ref_cache=dict(), required=True, ) + return parsed_model case _: raise TypeError(f"Unsupported schema type: {schema_type}") - if not issubclass(parsed_model, BaseModel): - raise TypeError( - f"Parsed model {parsed_model.__name__} is not a subclass of BaseModel." - ) - - return parsed_model - - @staticmethod - def _from_object(schema: JSONSchema) -> type[BaseModel]: - """ - Converts a JSON Schema object to a Pydantic model. - :param schema: The JSON Schema object to convert. - :return: A Pydantic model class. - """ - - if "properties" not in schema: - raise ValueError("JSON Schema object must have properties defined.") - - return ObjectTypeParser.to_model( - schema["title"], - schema["properties"], - schema.get("required", []), - context=schema, - ref_cache=dict(), - ) - @staticmethod def _get_schema_type(schema: JSONSchema) -> str: """ diff --git a/tests/parser/test_ref_type_parser.py b/tests/parser/test_ref_type_parser.py index c6ad1c3..3e08ff4 100644 --- a/tests/parser/test_ref_type_parser.py +++ b/tests/parser/test_ref_type_parser.py @@ -1,9 +1,156 @@ from jambo.parser import ObjectTypeParser, RefTypeParser +from typing import ForwardRef from unittest import TestCase class TestRefTypeParser(TestCase): + def test_ref_type_parser_throws_without_ref(self): + properties = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_without_context(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(RuntimeError): + RefTypeParser().from_properties( + "person", + properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_without_ref_cache(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(RuntimeError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + required=True, + ) + + def test_ref_type_parser_throws_if_network_ref_type(self): + properties = { + "title": "person", + "$ref": "https://example.com/schemas/person.json", + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_non_root_or_def_ref(self): + # This is invalid because object3 is referencing object2, + # but object2 is not defined in $defs or as a root reference. + properties = { + "title": "object1", + "type": "object", + "properties": { + "object2": { + "type": "object", + "properties": { + "attr1": { + "type": "string", + }, + "attr2": { + "type": "integer", + }, + }, + }, + "object3": { + "$ref": "#/$defs/object2", + }, + }, + } + + with self.assertRaises(ValueError): + ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_def_doesnt_exists(self): + properties = { + "title": "person", + "$ref": "#/$defs/employee", + "$defs": {}, + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + def test_ref_type_parser_throws_if_ref_property_doesnt_exists(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": {"person": None}, + } + + with self.assertRaises(ValueError): + RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + def test_ref_type_parser_with_def(self): properties = { "title": "person", @@ -71,6 +218,29 @@ class TestRefTypeParser(TestCase): self.assertEqual(obj.emergency_contact.name, "Jane") self.assertEqual(obj.emergency_contact.age, 28) + def test_ref_type_parser_invalid_forward_ref(self): + properties = { + # Doesn't have a title, which is required for forward references + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + with self.assertRaises(ValueError): + ObjectTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + def test_ref_type_parser_forward_ref_can_checks_validation(self): properties = { "title": "person", @@ -143,3 +313,172 @@ class TestRefTypeParser(TestCase): self.assertIsInstance(obj.emergency_contact, model) self.assertEqual(obj.emergency_contact.name, "Jane") self.assertEqual(obj.emergency_contact.age, 28) + + def test_ref_type_parser_with_repeated_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + "friends": { + "type": "array", + "items": { + "$ref": "#/$defs/person", + }, + }, + }, + } + }, + } + + model, type_validator = RefTypeParser().from_properties( + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), + friends=[ + model(name="Alice", age=25), + model(name="Bob", age=26), + ], + ) + + self.assertEqual( + type(obj.emergency_contact), + type(obj.friends[0]), + "Emergency contact and friends should be of the same type", + ) + + def test_ref_type_parser_pre_computed_ref_cache(self): + ref_cache = {} + + parent_properties = { + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + properties1 = { + "title": "person1", + "$ref": "#/$defs/person", + } + model1, _ = RefTypeParser().from_properties( + "person", + properties1, + context=parent_properties, + ref_cache=ref_cache, + required=True, + ) + + properties2 = { + "title": "person2", + "$ref": "#/$defs/person", + } + model2, _ = RefTypeParser().from_properties( + "person", + properties2, + context=parent_properties, + ref_cache=ref_cache, + required=True, + ) + + self.assertIs(model1, model2, "Models should be the same instance") + + def test_parse_from_strategy_invalid_ref_strategy(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + with self.assertRaises(ValueError): + ref_strategy, ref_name, ref_property = RefTypeParser()._parse_from_strategy( + "invalid_strategy", + "person", + properties, + ) + + def test_parse_from_strategy_forward_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + parsed_type = RefTypeParser()._parse_from_strategy( + "forward_ref", + "person", + properties, + ) + + self.assertIsInstance(parsed_type, ForwardRef) + + def test_parse_from_strategy_def_ref(self): + properties = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + }, + } + + parsed_type = RefTypeParser()._parse_from_strategy( + "def_ref", + "person", + properties, + context=properties, + ref_cache={}, + required=True, + ) + + obj = parsed_type( + name="John", + age=30, + ) + + self.assertEqual(obj.name, "John") + self.assertEqual(obj.age, 30) From 58d4cd9707ec714a98769775f189cb793d27fb66 Mon Sep 17 00:00:00 2001 From: Vitor Hideyoshi Date: Thu, 19 Jun 2025 22:03:28 -0300 Subject: [PATCH 6/6] Adds Feature Example of the New Feature to the ReadMe --- README.md | 57 ++++++++++++++++++++++++++++++++++----- jambo/schema_converter.py | 1 - 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8404244..8dba275 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ Created to simplifying the process of dynamically generating Pydantic models for ## โœจ Features -- โœ… Convert JSON Schema into Pydantic models dynamically -- ๐Ÿ”’ Supports validation for strings, integers, floats, booleans, arrays, and nested objects -- โš™๏ธ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more -- ๐Ÿ“ฆ Zero config โ€” just pass your schema and get a model +- โœ… Convert JSON Schema into Pydantic models dynamically; +- ๐Ÿ”’ Supports validation for strings, integers, floats, booleans, arrays, nested objects, allOf, anyOf and ref; +- โš™๏ธ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more; +- ๐Ÿ“ฆ Zero config โ€” just pass your schema and get a model. --- @@ -45,7 +45,8 @@ pip install jambo ## ๐Ÿš€ Usage ```python -from jambo.schema_converter import SchemaConverter +from jambo import SchemaConverter + schema = { "title": "Person", @@ -70,6 +71,9 @@ print(obj) ### Strings with constraints ```python +from jambo import SchemaConverter + + schema = { "title": "EmailExample", "type": "object", @@ -92,6 +96,9 @@ print(obj) ### Integers with bounds ```python +from jambo import SchemaConverter + + schema = { "title": "AgeExample", "type": "object", @@ -109,6 +116,9 @@ print(obj) ### Nested Objects ```python +from jambo import SchemaConverter + + schema = { "title": "NestedObjectExample", "type": "object", @@ -130,6 +140,41 @@ obj = Model(address={"street": "Main St", "city": "Gotham"}) print(obj) ``` +### References + +```python +from jambo import SchemaConverter + + +schema = { + "title": "person", + "$ref": "#/$defs/person", + "$defs": { + "person": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#/$defs/person", + }, + }, + } + }, +} + +model = SchemaConverter.build(schema) + +obj = model( + name="John", + age=30, + emergency_contact=model( + name="Jane", + age=28, + ), +) +``` + --- ## ๐Ÿงช Running Tests @@ -171,8 +216,6 @@ poe create-hooks ## ๐Ÿ“Œ Roadmap / TODO - [ ] Support for `enum` and `const` -- [ ] Support for `anyOf`, `allOf`, `oneOf` -- [ ] Schema ref (`$ref`) resolution - [ ] Better error reporting for unsupported schema types --- diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index 952cdc2..6f9020e 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -50,7 +50,6 @@ class SchemaConverter: schema, context=schema, ref_cache=dict(), - required=True, ) return parsed_model case _: