diff --git a/README.md b/README.md index 47c2040..535d4a1 100644 --- a/README.md +++ b/README.md @@ -62,13 +62,7 @@ There are two ways to build models with Jambo: 1. The original static API: `SchemaConverter.build(schema)` doesn't persist any reference cache between calls and doesn't require any configuration. 2. The new instance API: use a `SchemaConverter()` instance and call `build_with_cache`, which exposes and persists a reference cache and helper methods. -The instance API is useful when you want to reuse generated subtypes, inspect cached models, or share caches between converters. See the docs for full details: https://jambo.readthedocs.io/en/latest/usage.ref_cache.html - - -> [!NOTE] -> The use of the instance API and ref cache can cause schema and type name collisions if not managed carefully, therefore -> it's recommended that each namespace or schema source uses its own `SchemaConverter` instance. -> If you don't need cache control, the static API is simpler and sufficient for most use cases. +The instance API is useful when you want to reuse generated subtypes, inspect cached models, or share caches between converters; all leveraging namespaces via the `$id` property in JSON Schema. See the docs for full details: https://jambo.readthedocs.io/en/latest/usage.ref_cache.html ### Static (compatibility) example diff --git a/docs/source/usage.ref_cache.rst b/docs/source/usage.ref_cache.rst index 7fda88e..a54c7ce 100644 --- a/docs/source/usage.ref_cache.rst +++ b/docs/source/usage.ref_cache.rst @@ -86,8 +86,32 @@ reference cache (a plain dict). Reusing the same converter instance across multiple calls will reuse that cache and therefore reuse previously generated model classes. +That cache is isolated per namespace via the `$id` property in JSON Schema, so +schemas with different `$id` values will not collide in the same cache. + .. code-block:: python + from jambo import SchemaConverter + + # no $id in this example, therefore a default namespace is used + schema = { + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, + }, + "required": ["name", "address"], + } + converter = SchemaConverter() # has its own internal cache model1 = converter.build_with_cache(schema) @@ -96,6 +120,39 @@ model classes. # model1 and model2 are the same object because the instance cache persisted assert model1 is model2 +When passing a schema with a different `$id`, the instance cache keeps types +separate: + +.. code-block:: python + + schema_a = { + "$id": "namespace_a", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + }, + "required": ["name"], + } + + schema_b = { + "$id": "namespace_b", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + }, + "required": ["name"], + } + + converter = SchemaConverter() # has its own internal cache + + model_a = converter.build_with_cache(schema_a) + model_b = converter.build_with_cache(schema_b) + + # different $id values isolate the types in the same cache + assert model_a is not model_b + If you want to temporarily avoid using the instance cache for a single call, use ``without_cache=True``. That causes :py:meth:`SchemaConverter.build_with_cache ` to use a fresh, empty cache for the duration of that call only: @@ -118,7 +175,7 @@ instance cache. Retrieving cached types ----------------------- -:py:meth:`SchemaConverter.get_cached_ref `(name) — returns a cached model class or ``None``. +:py:meth:`SchemaConverter.get_cached_ref `(name, namespace="default") — returns a cached model class or ``None``. Retrieving the root type of the schema ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -212,10 +269,62 @@ When retrieving a type defined in ``$defs``, access it directly by its name. cached_address_model = converter.get_cached_ref("address") +Isolation by Namespace +~~~~~~~~~~~~~~~~~~~~~~ + +The instance cache is isolated per namespace via the `$id` property in JSON Schema. +When retrieving a cached type, you can specify the namespace to look in +(via the ``namespace`` parameter). By default, the ``default`` namespace is used + + +.. code-block:: python + + from jambo import SchemaConverter + + converter = SchemaConverter() + + schema_a = { + "$id": "namespace_a", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + }, + "required": ["name"], + } + + schema_b = { + "$id": "namespace_b", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + }, + "required": ["name"], + } + + person_a = converter.build_with_cache(schema_a) + person_b = converter.build_with_cache(schema_b) + + cached_person_a = converter.get_cached_ref("Person", namespace="namespace_a") + cached_person_b = converter.get_cached_ref("Person", namespace="namespace_b") + + assert cached_person_a is person_a + assert cached_person_b is person_b + + Clearing the cache ------------------ -:py:meth:`SchemaConverter.clear_ref_cache `() — removes all entries from the instance cache. +:py:meth:`SchemaConverter.clear_ref_cache `(namespace: Optional[str]="default") — removes all entries from the instance cache. + + +When you want to clear the instance cache, use :py:meth:`SchemaConverter.clear_ref_cache `. +You can optionally specify a ``namespace`` to clear only that namespace; +otherwise, the default namespace is cleared. + +If you want to clear all namespaces, call :py:meth:`SchemaConverter.clear_ref_cache ` passing `None` as the namespace, +which removes all entries from all namespaces. Notes and Behavioural Differences diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 5db74f4..8c16fee 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -99,9 +99,9 @@ the instance method persists and exposes the reference cache and provides helper .. warning:: The instance API with reference cache can lead to schema and type name collisions if not managed carefully. - It's recommended that each namespace or schema source uses its own `SchemaConverter` instance. - If you don't need cache control, the static API is simpler and sufficient for most use cases. - + It's recommended that each schema defines its own unique namespace using the `$id` property in JSON Schema, + and that access it's ref_cache by passing it explicitly when needed. + For details and examples about the reference cache and the different cache modes (instance cache, per-call cache, ephemeral cache), see: .. toctree:: diff --git a/jambo/schema_converter.py b/jambo/schema_converter.py index 37a062d..0e92119 100644 --- a/jambo/schema_converter.py +++ b/jambo/schema_converter.py @@ -17,10 +17,12 @@ class SchemaConverter: fields and types. The generated model can be used for data validation and serialization. """ - def __init__(self, ref_cache: Optional[RefCacheDict] = None) -> None: - if ref_cache is None: - ref_cache = dict() - self._ref_cache = ref_cache + def __init__( + self, namespace_registry: Optional[dict[str, RefCacheDict]] = None + ) -> None: + if namespace_registry is None: + namespace_registry = dict() + self._namespace_registry = namespace_registry def build_with_cache( self, @@ -43,7 +45,8 @@ class SchemaConverter: if without_cache: local_ref_cache = dict() elif ref_cache is None: - local_ref_cache = self._ref_cache + namespace = schema.get("$id", "default") + local_ref_cache = self._namespace_registry.setdefault(namespace, dict()) else: local_ref_cache = ref_cache @@ -107,19 +110,28 @@ class SchemaConverter: unsupported_field=unsupported_type, ) - def clear_ref_cache(self) -> None: + def clear_ref_cache(self, namespace: Optional[str] = "default") -> None: """ Clears the reference cache. """ - self._ref_cache.clear() + if namespace is None: + self._namespace_registry.clear() + return - def get_cached_ref(self, ref_name: str): + if namespace in self._namespace_registry: + self._namespace_registry[namespace].clear() + + def get_cached_ref( + self, ref_name: str, namespace: str = "default" + ) -> Optional[type]: """ Gets a cached reference from the reference cache. :param ref_name: The name of the reference to get. :return: The cached reference, or None if not found. - """ - cached_type = self._ref_cache.get(ref_name) + """ + cached_type = self._namespace_registry.get( + namespace, {} + ).get(ref_name) if isinstance(cached_type, type): return cached_type diff --git a/tests/test_schema_converter.py b/tests/test_schema_converter.py index 450e441..dd2b32c 100644 --- a/tests/test_schema_converter.py +++ b/tests/test_schema_converter.py @@ -19,7 +19,7 @@ class TestSchemaConverter(TestCase): self.converter = SchemaConverter() def tearDown(self): - self.converter.clear_ref_cache() + self.converter.clear_ref_cache(namespace=None) def test_invalid_schema(self): schema = { @@ -877,7 +877,6 @@ class TestSchemaConverter(TestCase): converter2 = SchemaConverter(ref_cache) model2 = converter2.build_with_cache(schema) - self.assertIs(converter1._ref_cache, converter2._ref_cache) self.assertIs(model1, model2) def test_instance_level_ref_cache_isolation_via_without_cache_param(self): @@ -1041,3 +1040,131 @@ class TestSchemaConverter(TestCase): with self.assertRaises(InvalidSchemaException): self.converter.build_with_cache(schema) + + def tests_instance_level_ref_cache_isolation_via_property_id(self): + schema1: JSONSchema = { + "$id": "http://example.com/schemas/person1.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model1 = self.converter.build_with_cache(schema1) + + schema2: JSONSchema = { + "$id": "http://example.com/schemas/person2.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"type": "string"}, + }, + "required": ["name", "age", "address"], + } + + model2 = self.converter.build_with_cache(schema2) + + self.assertIsNot(model1, model2) + + def tests_instance_level_ref_cache_colision_when_same_property_id(self): + schema1: JSONSchema = { + "$id": "http://example.com/schemas/person.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "emergency_contact": { + "$ref": "#", + }, + }, + "required": ["name", "age"], + } + + model1 = self.converter.build_with_cache(schema1) + + schema2: JSONSchema = { + "$id": "http://example.com/schemas/person.json", + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": {"type": "string"}, + }, + "required": ["name", "age", "address"], + } + + model2 = self.converter.build_with_cache(schema2) + + self.assertIs(model1, model2) + + def test_namespace_isolation_via_on_call_config(self): + namespace = "namespace1" + + schema: JSONSchema = { + "$id": namespace, + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, + }, + "required": ["name", "age", "address"], + } + + model = self.converter.build_with_cache(schema) + + invalid_cached_model = self.converter.get_cached_ref("Person") + self.assertIsNone(invalid_cached_model) + + cached_model = self.converter.get_cached_ref("Person", namespace=namespace) + self.assertIs(model, cached_model) + + def test_clear_namespace_registry(self): + namespace = "namespace_to_clear" + + schema: JSONSchema = { + "$id": namespace, + "title": "Person", + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + }, + "required": ["street", "city"], + }, + }, + "required": ["name", "age", "address"], + } + + model = self.converter.build_with_cache(schema) + + cached_model = self.converter.get_cached_ref("Person", namespace=namespace) + self.assertIs(model, cached_model) + + self.converter.clear_ref_cache(namespace=namespace) + + cleared_cached_model = self.converter.get_cached_ref("Person", namespace=namespace) + self.assertIsNone(cleared_cached_model)