Merge pull request #20 from HideyoshiNakazone/feature/ref-type-parser

[FEATURE] Implementation of $ref JSON Schema Keyword
This commit was merged in pull request #20.
This commit is contained in:
2025-06-19 22:09:11 -03:00
committed by GitHub
9 changed files with 776 additions and 25 deletions

View File

@@ -27,10 +27,10 @@ Created to simplifying the process of dynamically generating Pydantic models for
## ✨ Features
- ✅ Convert JSON Schema into Pydantic models dynamically
- 🔒 Supports validation for strings, integers, floats, booleans, arrays, and nested objects
- ⚙️ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more
- 📦 Zero config — just pass your schema and get a model
- ✅ Convert JSON Schema into Pydantic models dynamically;
- 🔒 Supports validation for strings, integers, floats, booleans, arrays, nested objects, allOf, anyOf and ref;
- ⚙️ Enforces constraints like `minLength`, `maxLength`, `pattern`, `minimum`, `maximum`, `uniqueItems`, and more;
- 📦 Zero config — just pass your schema and get a model.
---
@@ -45,7 +45,8 @@ pip install jambo
## 🚀 Usage
```python
from jambo.schema_converter import SchemaConverter
from jambo import SchemaConverter
schema = {
"title": "Person",
@@ -70,6 +71,9 @@ print(obj)
### Strings with constraints
```python
from jambo import SchemaConverter
schema = {
"title": "EmailExample",
"type": "object",
@@ -92,6 +96,9 @@ print(obj)
### Integers with bounds
```python
from jambo import SchemaConverter
schema = {
"title": "AgeExample",
"type": "object",
@@ -109,6 +116,9 @@ print(obj)
### Nested Objects
```python
from jambo import SchemaConverter
schema = {
"title": "NestedObjectExample",
"type": "object",
@@ -130,6 +140,41 @@ obj = Model(address={"street": "Main St", "city": "Gotham"})
print(obj)
```
### References
```python
from jambo import SchemaConverter
schema = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#/$defs/person",
},
},
}
},
}
model = SchemaConverter.build(schema)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
)
```
---
## 🧪 Running Tests
@@ -171,8 +216,6 @@ poe create-hooks
## 📌 Roadmap / TODO
- [ ] Support for `enum` and `const`
- [ ] Support for `anyOf`, `allOf`, `oneOf`
- [ ] Schema ref (`$ref`) resolution
- [ ] Better error reporting for unsupported schema types
---

View File

@@ -9,6 +9,7 @@ from .boolean_type_parser import BooleanTypeParser
from .float_type_parser import FloatTypeParser
from .int_type_parser import IntTypeParser
from .object_type_parser import ObjectTypeParser
from .ref_type_parser import RefTypeParser
from .string_type_parser import StringTypeParser
@@ -22,4 +23,5 @@ __all__ = [
"IntTypeParser",
"ObjectTypeParser",
"StringTypeParser",
"RefTypeParser",
]

View File

@@ -33,7 +33,7 @@ class GenericTypeParser(ABC, Generic[T]):
def from_properties(
self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions]
) -> tuple[type, dict]:
) -> tuple[T, dict]:
"""
Converts properties to a type and its fields properties.
:param name: The name of the type.

View File

@@ -1,8 +1,7 @@
from jambo.parser._type_parser import GenericTypeParser
from jambo.types.type_parser_options import TypeParserOptions
from pydantic import Field, create_model
from pydantic.main import ModelT
from pydantic import BaseModel, Field, create_model
from typing_extensions import Any, Unpack
@@ -13,7 +12,7 @@ class ObjectTypeParser(GenericTypeParser):
def from_properties_impl(
self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions]
):
) -> tuple[type[BaseModel], dict]:
type_parsing = self.to_model(
name,
properties.get("properties", {}),
@@ -29,13 +28,14 @@ class ObjectTypeParser(GenericTypeParser):
return type_parsing, type_properties
@classmethod
def to_model(
self,
cls,
name: str,
schema: dict[str, Any],
required_keys: list[str],
**kwargs: Unpack[TypeParserOptions],
) -> type[ModelT]:
) -> type[BaseModel]:
"""
Converts JSON Schema object properties to a Pydantic model.
:param name: The name of the model.
@@ -43,11 +43,12 @@ class ObjectTypeParser(GenericTypeParser):
:param required_keys: List of required keys in the schema.
:return: A Pydantic model class.
"""
fields = self._parse_properties(schema, required_keys, **kwargs)
fields = cls._parse_properties(schema, required_keys, **kwargs)
return create_model(name, **fields)
@staticmethod
@classmethod
def _parse_properties(
cls,
properties: dict[str, Any],
required_keys: list[str],
**kwargs: Unpack[TypeParserOptions],

View File

@@ -0,0 +1,125 @@
from jambo.parser import GenericTypeParser
from jambo.types.type_parser_options import TypeParserOptions
from typing_extensions import Any, ForwardRef, Literal, TypeVar, Union, Unpack
RefType = TypeVar("RefType", bound=Union[type, ForwardRef])
RefStrategy = Literal["forward_ref", "def_ref"]
class RefTypeParser(GenericTypeParser):
json_schema_type = "$ref"
def from_properties_impl(
self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions]
) -> tuple[RefType, dict]:
if "$ref" not in properties:
raise ValueError(f"RefTypeParser: Missing $ref in properties for {name}")
context = kwargs.get("context")
if context is None:
raise RuntimeError(
f"RefTypeParser: Missing `content` in properties for {name}"
)
ref_cache = kwargs.get("ref_cache")
if ref_cache is None:
raise RuntimeError(
f"RefTypeParser: Missing `ref_cache` in properties for {name}"
)
mapped_properties = self.mappings_properties_builder(properties, **kwargs)
ref_strategy, ref_name, ref_property = self._examine_ref_strategy(
name, properties, **kwargs
)
ref_state = self._get_ref_from_cache(ref_name, ref_cache)
if ref_state is not None:
# If the reference is either processing or already cached
return ref_state, mapped_properties
ref_cache[ref_name] = self._parse_from_strategy(
ref_strategy, ref_name, ref_property, **kwargs
)
return ref_cache[ref_name], mapped_properties
def _parse_from_strategy(
self,
ref_strategy: RefStrategy,
ref_name: str,
ref_property: dict[str, Any],
**kwargs: Unpack[TypeParserOptions],
):
match ref_strategy:
case "forward_ref":
mapped_type = ForwardRef(ref_name)
case "def_ref":
mapped_type, _ = GenericTypeParser.type_from_properties(
ref_name, ref_property, **kwargs
)
case _:
raise ValueError(
f"RefTypeParser: Unsupported $ref {ref_property['$ref']}"
)
return mapped_type
def _get_ref_from_cache(
self, ref_name: str, ref_cache: dict[str, type]
) -> RefType | type | None:
try:
ref_state = ref_cache[ref_name]
if ref_state is None:
# If the reference is being processed, we return a ForwardRef
return ForwardRef(ref_name)
# If the reference is already cached, we return it
return ref_state
except KeyError:
# If the reference is not in the cache, we will set it to None
ref_cache[ref_name] = None
def _examine_ref_strategy(
self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions]
) -> tuple[RefStrategy, str, dict] | None:
if properties["$ref"] == "#":
ref_name = kwargs["context"].get("title")
if ref_name is None:
raise ValueError(
"RefTypeParser: Missing title in properties for $ref of Root Reference"
)
return "forward_ref", ref_name, {}
if properties["$ref"].startswith("#/$defs/"):
target_name, target_property = self._extract_target_ref(
name, properties, **kwargs
)
return "def_ref", target_name, target_property
raise ValueError(
"RefTypeParser: Only Root and $defs references are supported at the moment"
)
def _extract_target_ref(
self, name: str, properties: dict[str, Any], **kwargs: Unpack[TypeParserOptions]
) -> tuple[str, dict]:
target_name = None
target_property = kwargs["context"]
for prop_name in properties["$ref"].split("/")[1:]:
if prop_name not in target_property:
raise ValueError(
f"RefTypeParser: Missing {prop_name} in"
" properties for $ref {properties['$ref']}"
)
target_name = prop_name
target_property = target_property[prop_name]
if target_name is None or target_property is None:
raise ValueError(f"RefTypeParser: Invalid $ref {properties['$ref']}")
return target_name, target_property

View File

@@ -1,9 +1,9 @@
from jambo.parser import ObjectTypeParser
from jambo.parser import ObjectTypeParser, RefTypeParser
from jambo.types.json_schema_type import JSONSchema
from jsonschema.exceptions import SchemaError
from jsonschema.validators import validator_for
from pydantic.main import ModelT
from pydantic import BaseModel
class SchemaConverter:
@@ -16,7 +16,7 @@ class SchemaConverter:
"""
@staticmethod
def build(schema: JSONSchema) -> type[ModelT]:
def build(schema: JSONSchema) -> type[BaseModel]:
"""
Converts a JSON Schema to a Pydantic model.
:param schema: The JSON Schema to convert.
@@ -32,11 +32,37 @@ class SchemaConverter:
if "title" not in schema:
raise ValueError("JSON Schema must have a title.")
if schema["type"] != "object":
raise TypeError(
f"Invalid JSON Schema: {schema['type']}. Only 'object' can be converted to Pydantic models."
)
schema_type = SchemaConverter._get_schema_type(schema)
return ObjectTypeParser().to_model(
schema["title"], schema.get("properties"), schema.get("required")
)
match schema_type:
case "object":
return ObjectTypeParser.to_model(
schema["title"],
schema["properties"],
schema.get("required", []),
context=schema,
ref_cache=dict(),
)
case "$ref":
parsed_model, _ = RefTypeParser().from_properties(
schema["title"],
schema,
context=schema,
ref_cache=dict(),
)
return parsed_model
case _:
raise TypeError(f"Unsupported schema type: {schema_type}")
@staticmethod
def _get_schema_type(schema: JSONSchema) -> str:
"""
Returns the type of the schema.
:param schema: The JSON Schema to check.
:return: The type of the schema.
"""
if "$ref" in schema:
return "$ref"
return schema.get("type", "undefined")

View File

@@ -1,5 +1,9 @@
from jambo.types.json_schema_type import JSONSchema
from typing_extensions import TypedDict
class TypeParserOptions(TypedDict):
required: bool
context: JSONSchema
ref_cache: dict[str, type]

View File

@@ -0,0 +1,484 @@
from jambo.parser import ObjectTypeParser, RefTypeParser
from typing import ForwardRef
from unittest import TestCase
class TestRefTypeParser(TestCase):
def test_ref_type_parser_throws_without_ref(self):
properties = {
"title": "person",
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
"required": ["name", "age"],
}
with self.assertRaises(ValueError):
RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_throws_without_context(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
with self.assertRaises(RuntimeError):
RefTypeParser().from_properties(
"person",
properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_throws_without_ref_cache(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
with self.assertRaises(RuntimeError):
RefTypeParser().from_properties(
"person",
properties,
context=properties,
required=True,
)
def test_ref_type_parser_throws_if_network_ref_type(self):
properties = {
"title": "person",
"$ref": "https://example.com/schemas/person.json",
}
with self.assertRaises(ValueError):
RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_throws_if_non_root_or_def_ref(self):
# This is invalid because object3 is referencing object2,
# but object2 is not defined in $defs or as a root reference.
properties = {
"title": "object1",
"type": "object",
"properties": {
"object2": {
"type": "object",
"properties": {
"attr1": {
"type": "string",
},
"attr2": {
"type": "integer",
},
},
},
"object3": {
"$ref": "#/$defs/object2",
},
},
}
with self.assertRaises(ValueError):
ObjectTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_throws_if_def_doesnt_exists(self):
properties = {
"title": "person",
"$ref": "#/$defs/employee",
"$defs": {},
}
with self.assertRaises(ValueError):
RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_throws_if_ref_property_doesnt_exists(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {"person": None},
}
with self.assertRaises(ValueError):
RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_with_def(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
type_parsing, type_validator = RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
self.assertIsInstance(type_parsing, type)
obj = type_parsing(name="John", age=30)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)
def test_ref_type_parser_with_forward_ref(self):
properties = {
"title": "person",
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#",
},
},
"required": ["name", "age"],
}
model, type_validator = ObjectTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)
self.assertIsInstance(obj.emergency_contact, model)
self.assertEqual(obj.emergency_contact.name, "Jane")
self.assertEqual(obj.emergency_contact.age, 28)
def test_ref_type_parser_invalid_forward_ref(self):
properties = {
# Doesn't have a title, which is required for forward references
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#",
},
},
"required": ["name", "age"],
}
with self.assertRaises(ValueError):
ObjectTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
def test_ref_type_parser_forward_ref_can_checks_validation(self):
properties = {
"title": "person",
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#",
},
},
"required": ["name", "age"],
}
model, type_validator = ObjectTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
# checks if when created via FowardRef the model is validated correctly.
with self.assertRaises(ValueError):
model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
),
)
def test_ref_type_parser_with_ciclic_def(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#/$defs/person",
},
},
}
},
}
model, type_validator = RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)
self.assertIsInstance(obj.emergency_contact, model)
self.assertEqual(obj.emergency_contact.name, "Jane")
self.assertEqual(obj.emergency_contact.age, 28)
def test_ref_type_parser_with_repeated_ref(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#/$defs/person",
},
"friends": {
"type": "array",
"items": {
"$ref": "#/$defs/person",
},
},
},
}
},
}
model, type_validator = RefTypeParser().from_properties(
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
friends=[
model(name="Alice", age=25),
model(name="Bob", age=26),
],
)
self.assertEqual(
type(obj.emergency_contact),
type(obj.friends[0]),
"Emergency contact and friends should be of the same type",
)
def test_ref_type_parser_pre_computed_ref_cache(self):
ref_cache = {}
parent_properties = {
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
properties1 = {
"title": "person1",
"$ref": "#/$defs/person",
}
model1, _ = RefTypeParser().from_properties(
"person",
properties1,
context=parent_properties,
ref_cache=ref_cache,
required=True,
)
properties2 = {
"title": "person2",
"$ref": "#/$defs/person",
}
model2, _ = RefTypeParser().from_properties(
"person",
properties2,
context=parent_properties,
ref_cache=ref_cache,
required=True,
)
self.assertIs(model1, model2, "Models should be the same instance")
def test_parse_from_strategy_invalid_ref_strategy(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
with self.assertRaises(ValueError):
ref_strategy, ref_name, ref_property = RefTypeParser()._parse_from_strategy(
"invalid_strategy",
"person",
properties,
)
def test_parse_from_strategy_forward_ref(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
parsed_type = RefTypeParser()._parse_from_strategy(
"forward_ref",
"person",
properties,
)
self.assertIsInstance(parsed_type, ForwardRef)
def test_parse_from_strategy_def_ref(self):
properties = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
}
},
}
parsed_type = RefTypeParser()._parse_from_strategy(
"def_ref",
"person",
properties,
context=properties,
ref_cache={},
required=True,
)
obj = parsed_type(
name="John",
age=30,
)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)

View File

@@ -496,3 +496,69 @@ class TestSchemaConverter(TestCase):
}
with self.assertRaises(ValueError):
SchemaConverter.build(schema)
def test_ref_with_root_ref(self):
schema = {
"title": "Person",
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#",
},
},
"required": ["name", "age"],
}
model = SchemaConverter.build(schema)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)
self.assertIsInstance(obj.emergency_contact, model)
self.assertEqual(obj.emergency_contact.name, "Jane")
self.assertEqual(obj.emergency_contact.age, 28)
def test_ref_with_def(self):
schema = {
"title": "person",
"$ref": "#/$defs/person",
"$defs": {
"person": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"emergency_contact": {
"$ref": "#/$defs/person",
},
},
}
},
}
model = SchemaConverter.build(schema)
obj = model(
name="John",
age=30,
emergency_contact=model(
name="Jane",
age=28,
),
)
self.assertEqual(obj.name, "John")
self.assertEqual(obj.age, 30)
self.assertIsInstance(obj.emergency_contact, model)
self.assertEqual(obj.emergency_contact.name, "Jane")
self.assertEqual(obj.emergency_contact.age, 28)