diff --git a/docs/source/usage.ref_cache.rst b/docs/source/usage.ref_cache.rst new file mode 100644 index 0000000..adcf2f9 --- /dev/null +++ b/docs/source/usage.ref_cache.rst @@ -0,0 +1,233 @@ +=============== +Reference Cache +=============== + +The reference cache is named after the mechanism used to implement +the `$ref` keyword in the JSON Schema specification. + +Internally, the cache is used by both :py:meth:`SchemaConverter.build_with_cache ` +and :py:meth:`SchemaConverter.build `. +However, only :py:meth:`SchemaConverter.build_with_cache ` exposes the cache through a supported API; +:py:meth:`SchemaConverter.build ` uses the cache internally and does not provide access to it. + + +----------------------------------------- +Configuring and Using the Reference Cache +----------------------------------------- + +The reference cache can be used in three ways: + +* Without a persistent reference cache (no sharing between calls). +* Passing an explicit ``ref_cache`` dictionary to a call. +* Using the converter instance's default cache (the instance-level cache). + + +Usage Without Reference Cache +============================= + +When you run the library without a persistent reference cache, the generated +types are not stored for reuse. Each call to a build method creates fresh +Pydantic model classes (they will have different Python object identities). +Because nothing is cached, you cannot look up generated subtypes later. + +This is the default behaviour of :py:meth:`SchemaConverter.build `. +You can achieve the same behaviour with :py:meth:`SchemaConverter.build_with_cache ` by +passing ``without_cache=True``. + + +Usage: Manually Passing a Reference Cache +========================================= + +You can create and pass your own mutable mapping (typically a plain dict) +as the reference cache. This gives you full control over sharing and +lifetime of cached types. When two converters share the same dict, types +created by one converter will be reused by the other. + +.. code-block:: python + + from jambo import SchemaConverter + + # a shared cache you control + shared_cache = {} + + converter1 = SchemaConverter(shared_cache) + converter2 = SchemaConverter(shared_cache) + + model1 = converter1.build_with_cache(schema) + model2 = converter2.build_with_cache(schema) + + # Because both converters use the same cache object, the built models are the same object + assert model1 is model2 + +If you prefer a per-call cache (leaving the converter's instance cache unchanged), pass the ``ref_cache`` parameter to +:py:meth:`SchemaConverter.build_with_cache `: + +.. code-block:: python + + # pass an explicit, private cache for this call only + model_a = converter1.build_with_cache(schema, ref_cache={}) + model_b = converter1.build_with_cache(schema, ref_cache={}) + + # because each call received a fresh dict, the resulting model classes are distinct + assert model_a is not model_b + + +Usage: Using the Instance Default (Instance-level) Cache +======================================================= + +By default, a :class:`SchemaConverter` instance creates and keeps an internal +reference cache (a plain dict). Reusing the same converter instance across +multiple calls will reuse that cache and therefore reuse previously generated +model classes. + +.. code-block:: python + + converter = SchemaConverter() # has its own internal cache + + model1 = converter.build_with_cache(schema) + model2 = converter.build_with_cache(schema) + + # model1 and model2 are the same object because the instance cache persisted + assert model1 is model2 + +If you want to temporarily avoid using the instance cache for a single call, +use ``without_cache=True``. That causes :py:meth:`SchemaConverter.build_with_cache ` to +use a fresh, empty cache for the duration of that call only: + +.. code-block:: python + + model1 = converter.build_with_cache(schema, without_cache=True) + model2 = converter.build_with_cache(schema, without_cache=True) + + # each call used a fresh cache, so the models are distinct + assert model1 is not model2 + + +Inspecting and Managing the Cache +================================= + +The converter provides a small, explicit API to inspect and manage the +instance cache. + +Retrieving cached types +----------------------- + +:py:meth:`SchemaConverter.get_cached_ref `(name) — returns a cached model class or ``None``. + +Retrieving the root type of the schema +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When retrieving the root type of a schema, pass the schema's ``title`` property as the name. + +.. code-block:: python + + from jambo import SchemaConverter + + converter = SchemaConverter() + + schema = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + + person_model = converter.build_with_cache(schema) + cached_person_model = converter.get_cached_ref("person") + + +Retrieving a subtype +~~~~~~~~~~~~~~~~~~~~ + +When retrieving a subtype, pass a path string (for example, ``parent_name.field_name``) as the name. + +.. code-block:: python + + from jambo import SchemaConverter + + converter = SchemaConverter() + + schema = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, + } + } + + person_model = converter.build_with_cache(schema) + cached_address_model = converter.get_cached_ref("person.address") + + + +Retrieving a type from ``$defs`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When retrieving a type defined in ``$defs``, access it directly by its name. + +.. code-block:: python + + from jambo import SchemaConverter + + converter = SchemaConverter() + + schema = { + "title": "person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"$ref": "#/$defs/address"}, + }, + "$defs": { + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + } + }, + } + + person_model = converter.build_with_cache(schema) + cached_address_model = converter.get_cached_ref("address") + + +Clearing the cache +------------------ + +:py:meth:`SchemaConverter.clear_ref_cache `() — removes all entries from the instance cache. + + +Notes and Behavioural Differences +================================ + +* :py:meth:`SchemaConverter.build ` does not expose or persist an instance cache. If you call it without + providing a ``ref_cache`` it will create and use a temporary cache for that + call only; nothing from that call will be available later via + :py:meth:`SchemaConverter.get_cached_ref `. + +* :py:meth:`SchemaConverter.build_with_cache ` is the supported entry point when you want + cache control: it uses the instance cache by default, accepts an explicit + ``ref_cache`` dict for per-call control, or uses ``without_cache=True`` to + run with an ephemeral cache. + + +References in the Test Suite +============================ + +These behaviours are exercised in the project's tests; see :mod:`tests.test_schema_converter` +for examples and additional usage notes. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 3bdb2d9..5a7dc08 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -1,9 +1,15 @@ +=================== Using Jambo =================== -Jambo is designed to be easy to use, it doesn't require any complex setup or configuration. -Below a example of how to use Jambo to convert a JSON Schema into a Pydantic model. +Jambo is designed to be easy to use. It doesn't require complex setup or configuration when not needed, while providing more powerful instance methods when you do need control. +Below is an example of how to use Jambo to convert a JSON Schema into a Pydantic model. + + +------------------------- +Static Method (no config) +------------------------- .. code-block:: python @@ -15,8 +21,16 @@ Below a example of how to use Jambo to convert a JSON Schema into a Pydantic mod "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, }, - "required": ["name"], + "required": ["name", "address"], } Person = SchemaConverter.build(schema) @@ -26,16 +40,76 @@ Below a example of how to use Jambo to convert a JSON Schema into a Pydantic mod # Output: Person(name='Alice', age=30) -The :py:meth:`SchemaConverter.build ` static method takes a JSON Schema dictionary and returns a Pydantic model class. You can then instantiate this class with the required fields, and it will automatically validate the data according to the schema. +The :py:meth:`SchemaConverter.build ` static method takes a JSON Schema dictionary and returns a Pydantic model class. -If passed a description inside the schema it will also add it to the Pydantic model using the `description` field. This is useful for AI Frameworks as: LangChain, CrewAI and others, as they use this description for passing context to LLMs. +Note: the static ``build`` method was the original public API of this library and is kept for backwards compatibility. It creates and returns a model class for the provided schema but does not expose or persist an instance cache. -For more complex schemas and types see our documentation on +-------------------------------- +Instance Method (with ref cache) +-------------------------------- + +.. code-block:: python + + from jambo import SchemaConverter + + converter = SchemaConverter() + + schema = { + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, + }, + "required": ["name", "address"], + } + + # The instance API (build_with_cache) populates the converter's instance-level reference cache + Person = converter.build_with_cache(schema) + + obj = Person(name="Alice", age=30) + print(obj) + # Output: Person(name='Alice', age=30) + + # When using the converter's built-in instance cache (no ref_cache passed to the call), + # all object types parsed during the build are stored and can be retrieved via get_cached_ref. + + cached_person_model = converter.get_cached_ref("Person") + assert Person is cached_person_model # the cached class is the same object that was built + + # A nested/subobject type can also be retrieved from the instance cache + cached_address_model = converter.get_cached_ref("Person.address") + + +The :py:meth:`SchemaConverter.build_with_cache ` instance method was added after the +initial static API to make it easier to access and reuse subtypes defined in a schema. +Unlike the original static :py:meth:`SchemaConverter.build `, +the instance method persists and exposes the reference cache and provides helpers such as +:py:meth:`SchemaConverter.get_cached_ref ` and +:py:meth:`SchemaConverter.clear_ref_cache `. + +For details and examples about the reference cache and the different cache modes (instance cache, per-call cache, ephemeral cache), see: + +.. toctree:: + usage.ref_cache + + +Type System +----------- + +For a full explanation of the supported schemas and types see our documentation on types: .. toctree:: :maxdepth: 2 - :caption: Contents: usage.string usage.numeric diff --git a/tests/test_schema_converter.py b/tests/test_schema_converter.py index 245ca8e..018c412 100644 --- a/tests/test_schema_converter.py +++ b/tests/test_schema_converter.py @@ -33,7 +33,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException): - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) def test_invalid_schema_type(self): schema = { @@ -47,7 +47,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException): - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) def test_build_expects_title(self): schema = { @@ -60,7 +60,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException): - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) def test_build_expects_object(self): schema = { @@ -70,7 +70,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(UnsupportedSchemaException): - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) def test_is_invalid_field(self): schema = { @@ -86,7 +86,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException) as context: - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) self.assertTrue("Unknown type" in str(context.exception)) def test_jsonschema_to_pydantic(self): @@ -101,7 +101,7 @@ class TestSchemaConverter(TestCase): "required": ["name"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertTrue(is_pydantic_model(model)) @@ -122,7 +122,7 @@ class TestSchemaConverter(TestCase): "required": ["name"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(name="John", age=30).name, "John") @@ -153,7 +153,7 @@ class TestSchemaConverter(TestCase): "required": ["age"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(age=30).age, 30) @@ -178,7 +178,7 @@ class TestSchemaConverter(TestCase): "required": ["age"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(age=30).age, 30.0) @@ -199,7 +199,7 @@ class TestSchemaConverter(TestCase): "required": ["is_active"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(is_active=True).is_active, True) @@ -222,7 +222,7 @@ class TestSchemaConverter(TestCase): "required": ["friends"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(friends=["John", "Jane", "John"]).friends, {"John", "Jane"} @@ -235,7 +235,7 @@ class TestSchemaConverter(TestCase): model(friends=["John", "Jane", "Invalid"]) def test_validation_list_with_missing_items(self): - model = self.converter.build_with_instance( + model = self.converter.build_with_cache( { "title": "Person", "description": "A person", @@ -273,7 +273,7 @@ class TestSchemaConverter(TestCase): "required": ["address"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) obj = model(address={"street": "123 Main St", "city": "Springfield"}) @@ -297,7 +297,7 @@ class TestSchemaConverter(TestCase): "required": ["name"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) obj = model(name="John") @@ -320,7 +320,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException): - self.converter.build_with_instance(schema_max_length) + self.converter.build_with_cache(schema_max_length) def test_default_for_list(self): schema_list = { @@ -337,7 +337,7 @@ class TestSchemaConverter(TestCase): "required": ["friends"], } - model_list = self.converter.build_with_instance(schema_list) + model_list = self.converter.build_with_cache(schema_list) self.assertEqual(model_list().friends, ["John", "Jane"]) @@ -358,7 +358,7 @@ class TestSchemaConverter(TestCase): "required": ["friends"], } - model_set = self.converter.build_with_instance(schema_set) + model_set = self.converter.build_with_cache(schema_set) self.assertEqual(model_set().friends, {"John", "Jane"}) @@ -380,7 +380,7 @@ class TestSchemaConverter(TestCase): "required": ["address"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) obj = model(address={"street": "123 Main St", "city": "Springfield"}) @@ -404,7 +404,7 @@ class TestSchemaConverter(TestCase): }, } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model( name="J", @@ -433,7 +433,7 @@ class TestSchemaConverter(TestCase): }, } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model(id=1) self.assertEqual(obj.id, 1) @@ -457,7 +457,7 @@ class TestSchemaConverter(TestCase): "properties": {"email": {"type": "string", "format": "email"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(email="test@example.com").email, "test@example.com") with self.assertRaises(ValidationError): @@ -470,7 +470,7 @@ class TestSchemaConverter(TestCase): "properties": {"website": {"type": "string", "format": "uri"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(website="https://example.com").website, AnyUrl("https://example.com") ) @@ -485,7 +485,7 @@ class TestSchemaConverter(TestCase): "properties": {"ip": {"type": "string", "format": "ipv4"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(ip="192.168.1.1").ip, IPv4Address("192.168.1.1")) with self.assertRaises(ValidationError): @@ -498,7 +498,7 @@ class TestSchemaConverter(TestCase): "properties": {"ip": {"type": "string", "format": "ipv6"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(ip="2001:0db8:85a3:0000:0000:8a2e:0370:7334").ip, IPv6Address("2001:0db8:85a3:0000:0000:8a2e:0370:7334"), @@ -514,7 +514,7 @@ class TestSchemaConverter(TestCase): "properties": {"id": {"type": "string", "format": "uuid"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(id="123e4567-e89b-12d3-a456-426614174000").id, @@ -531,7 +531,7 @@ class TestSchemaConverter(TestCase): "properties": {"hostname": {"type": "string", "format": "hostname"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual(model(hostname="example.com").hostname, "example.com") with self.assertRaises(ValidationError): @@ -544,7 +544,7 @@ class TestSchemaConverter(TestCase): "properties": {"timestamp": {"type": "string", "format": "date-time"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(timestamp="2024-01-01T12:00:00Z").timestamp.isoformat(), "2024-01-01T12:00:00+00:00", @@ -560,7 +560,7 @@ class TestSchemaConverter(TestCase): "properties": {"time": {"type": "string", "format": "time"}}, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) self.assertEqual( model(time="20:20:39+00:00").time.isoformat(), "20:20:39+00:00" ) @@ -576,7 +576,7 @@ class TestSchemaConverter(TestCase): } with self.assertRaises(InvalidSchemaException): - self.converter.build_with_instance(schema) + self.converter.build_with_cache(schema) def test_ref_with_root_ref(self): schema = { @@ -592,7 +592,7 @@ class TestSchemaConverter(TestCase): "required": ["name", "age"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) obj = model( name="John", @@ -627,7 +627,7 @@ class TestSchemaConverter(TestCase): }, } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) obj = model( name="John", @@ -666,7 +666,7 @@ class TestSchemaConverter(TestCase): }, } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model( name="John", @@ -692,7 +692,7 @@ class TestSchemaConverter(TestCase): "required": ["status"], } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model(status="active") self.assertEqual(obj.status.value, "active") @@ -711,7 +711,7 @@ class TestSchemaConverter(TestCase): "required": ["status"], } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model() self.assertEqual(obj.status.value, "active") @@ -728,7 +728,7 @@ class TestSchemaConverter(TestCase): "required": ["name"], } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model() self.assertEqual(obj.name, "United States of America") @@ -751,7 +751,7 @@ class TestSchemaConverter(TestCase): "required": ["name"], } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model() self.assertEqual(obj.name, ["Brazil"]) @@ -771,7 +771,7 @@ class TestSchemaConverter(TestCase): }, } - Model = self.converter.build_with_instance(schema) + Model = self.converter.build_with_cache(schema) obj = Model() self.assertIsNone(obj.a_thing) @@ -813,7 +813,7 @@ class TestSchemaConverter(TestCase): }, } - schema_type = self.converter.build_with_instance(schema) + schema_type = self.converter.build_with_cache(schema) # check for me that the types generated by the oneOf in the typing.Annotated have different names operating_system_field = schema_type.model_fields["operating_system"] @@ -827,7 +827,7 @@ class TestSchemaConverter(TestCase): def test_object_invalid_require(self): # https://github.com/HideyoshiNakazone/jambo/issues/60 - object_ = self.converter.build_with_instance( + object_ = self.converter.build_with_cache( { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "TEST", @@ -872,10 +872,10 @@ class TestSchemaConverter(TestCase): } converter1 = SchemaConverter(ref_cache) - model1 = converter1.build_with_instance(schema) + model1 = converter1.build_with_cache(schema) converter2 = SchemaConverter(ref_cache) - model2 = converter2.build_with_instance(schema) + model2 = converter2.build_with_cache(schema) self.assertIs(converter1._ref_cache, converter2._ref_cache) self.assertIs(model1, model2) @@ -894,8 +894,8 @@ class TestSchemaConverter(TestCase): "required": ["name", "age"], } - model1 = self.converter.build_with_instance(schema, without_cache=True) - model2 = self.converter.build_with_instance(schema, without_cache=True) + model1 = self.converter.build_with_cache(schema, without_cache=True) + model2 = self.converter.build_with_cache(schema, without_cache=True) self.assertIsNot(model1, model2) @@ -913,8 +913,8 @@ class TestSchemaConverter(TestCase): "required": ["name", "age"], } - model1 = self.converter.build_with_instance(schema, ref_cache={}) - model2 = self.converter.build_with_instance(schema, ref_cache={}) + model1 = self.converter.build_with_cache(schema, ref_cache={}) + model2 = self.converter.build_with_cache(schema, ref_cache={}) self.assertIsNot(model1, model2) @@ -932,7 +932,7 @@ class TestSchemaConverter(TestCase): "required": ["name", "age"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) cached_model = self.converter.get_cached_ref("Person") @@ -962,7 +962,7 @@ class TestSchemaConverter(TestCase): "required": ["name", "age", "address"], } - model = self.converter.build_with_instance(schema) + model = self.converter.build_with_cache(schema) cached_model = self.converter.get_cached_ref("Person.address") @@ -990,7 +990,7 @@ class TestSchemaConverter(TestCase): }, } - person_model = self.converter.build_with_instance(schema) + person_model = self.converter.build_with_cache(schema) cached_person_model = self.converter.get_cached_ref("person") self.assertIs(person_model, cached_person_model)