Merge pull request #67 from HideyoshiNakazone/feature/cache-per-namespace
feat: adds caching per namespace
This commit was merged in pull request #67.
This commit is contained in:
@@ -62,13 +62,7 @@ There are two ways to build models with Jambo:
|
|||||||
1. The original static API: `SchemaConverter.build(schema)` doesn't persist any reference cache between calls and doesn't require any configuration.
|
1. The original static API: `SchemaConverter.build(schema)` doesn't persist any reference cache between calls and doesn't require any configuration.
|
||||||
2. The new instance API: use a `SchemaConverter()` instance and call `build_with_cache`, which exposes and persists a reference cache and helper methods.
|
2. The new instance API: use a `SchemaConverter()` instance and call `build_with_cache`, which exposes and persists a reference cache and helper methods.
|
||||||
|
|
||||||
The instance API is useful when you want to reuse generated subtypes, inspect cached models, or share caches between converters. See the docs for full details: https://jambo.readthedocs.io/en/latest/usage.ref_cache.html
|
The instance API is useful when you want to reuse generated subtypes, inspect cached models, or share caches between converters; all leveraging namespaces via the `$id` property in JSON Schema. See the docs for full details: https://jambo.readthedocs.io/en/latest/usage.ref_cache.html
|
||||||
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> The use of the instance API and ref cache can cause schema and type name collisions if not managed carefully, therefore
|
|
||||||
> it's recommended that each namespace or schema source uses its own `SchemaConverter` instance.
|
|
||||||
> If you don't need cache control, the static API is simpler and sufficient for most use cases.
|
|
||||||
|
|
||||||
|
|
||||||
### Static (compatibility) example
|
### Static (compatibility) example
|
||||||
|
|||||||
@@ -86,8 +86,32 @@ reference cache (a plain dict). Reusing the same converter instance across
|
|||||||
multiple calls will reuse that cache and therefore reuse previously generated
|
multiple calls will reuse that cache and therefore reuse previously generated
|
||||||
model classes.
|
model classes.
|
||||||
|
|
||||||
|
That cache is isolated per namespace via the `$id` property in JSON Schema, so
|
||||||
|
schemas with different `$id` values will not collide in the same cache.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
from jambo import SchemaConverter
|
||||||
|
|
||||||
|
# no $id in this example, therefore a default namespace is used
|
||||||
|
schema = {
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"address": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"street": {"type": "string"},
|
||||||
|
"city": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["street", "city"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["name", "address"],
|
||||||
|
}
|
||||||
|
|
||||||
converter = SchemaConverter() # has its own internal cache
|
converter = SchemaConverter() # has its own internal cache
|
||||||
|
|
||||||
model1 = converter.build_with_cache(schema)
|
model1 = converter.build_with_cache(schema)
|
||||||
@@ -96,6 +120,39 @@ model classes.
|
|||||||
# model1 and model2 are the same object because the instance cache persisted
|
# model1 and model2 are the same object because the instance cache persisted
|
||||||
assert model1 is model2
|
assert model1 is model2
|
||||||
|
|
||||||
|
When passing a schema with a different `$id`, the instance cache keeps types
|
||||||
|
separate:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
schema_a = {
|
||||||
|
"$id": "namespace_a",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name"],
|
||||||
|
}
|
||||||
|
|
||||||
|
schema_b = {
|
||||||
|
"$id": "namespace_b",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name"],
|
||||||
|
}
|
||||||
|
|
||||||
|
converter = SchemaConverter() # has its own internal cache
|
||||||
|
|
||||||
|
model_a = converter.build_with_cache(schema_a)
|
||||||
|
model_b = converter.build_with_cache(schema_b)
|
||||||
|
|
||||||
|
# different $id values isolate the types in the same cache
|
||||||
|
assert model_a is not model_b
|
||||||
|
|
||||||
If you want to temporarily avoid using the instance cache for a single call,
|
If you want to temporarily avoid using the instance cache for a single call,
|
||||||
use ``without_cache=True``. That causes :py:meth:`SchemaConverter.build_with_cache <jambo.SchemaConverter.build_with_cache>` to
|
use ``without_cache=True``. That causes :py:meth:`SchemaConverter.build_with_cache <jambo.SchemaConverter.build_with_cache>` to
|
||||||
use a fresh, empty cache for the duration of that call only:
|
use a fresh, empty cache for the duration of that call only:
|
||||||
@@ -118,7 +175,7 @@ instance cache.
|
|||||||
Retrieving cached types
|
Retrieving cached types
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
:py:meth:`SchemaConverter.get_cached_ref <jambo.SchemaConverter.get_cached_ref>`(name) — returns a cached model class or ``None``.
|
:py:meth:`SchemaConverter.get_cached_ref <jambo.SchemaConverter.get_cached_ref>`(name, namespace="default") — returns a cached model class or ``None``.
|
||||||
|
|
||||||
Retrieving the root type of the schema
|
Retrieving the root type of the schema
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
@@ -212,10 +269,62 @@ When retrieving a type defined in ``$defs``, access it directly by its name.
|
|||||||
cached_address_model = converter.get_cached_ref("address")
|
cached_address_model = converter.get_cached_ref("address")
|
||||||
|
|
||||||
|
|
||||||
|
Isolation by Namespace
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The instance cache is isolated per namespace via the `$id` property in JSON Schema.
|
||||||
|
When retrieving a cached type, you can specify the namespace to look in
|
||||||
|
(via the ``namespace`` parameter). By default, the ``default`` namespace is used
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from jambo import SchemaConverter
|
||||||
|
|
||||||
|
converter = SchemaConverter()
|
||||||
|
|
||||||
|
schema_a = {
|
||||||
|
"$id": "namespace_a",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name"],
|
||||||
|
}
|
||||||
|
|
||||||
|
schema_b = {
|
||||||
|
"$id": "namespace_b",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name"],
|
||||||
|
}
|
||||||
|
|
||||||
|
person_a = converter.build_with_cache(schema_a)
|
||||||
|
person_b = converter.build_with_cache(schema_b)
|
||||||
|
|
||||||
|
cached_person_a = converter.get_cached_ref("Person", namespace="namespace_a")
|
||||||
|
cached_person_b = converter.get_cached_ref("Person", namespace="namespace_b")
|
||||||
|
|
||||||
|
assert cached_person_a is person_a
|
||||||
|
assert cached_person_b is person_b
|
||||||
|
|
||||||
|
|
||||||
Clearing the cache
|
Clearing the cache
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
:py:meth:`SchemaConverter.clear_ref_cache <jambo.SchemaConverter.clear_ref_cache>`() — removes all entries from the instance cache.
|
:py:meth:`SchemaConverter.clear_ref_cache <jambo.SchemaConverter.clear_ref_cache>`(namespace: Optional[str]="default") — removes all entries from the instance cache.
|
||||||
|
|
||||||
|
|
||||||
|
When you want to clear the instance cache, use :py:meth:`SchemaConverter.clear_ref_cache <jambo.SchemaConverter.clear_ref_cache>`.
|
||||||
|
You can optionally specify a ``namespace`` to clear only that namespace;
|
||||||
|
otherwise, the default namespace is cleared.
|
||||||
|
|
||||||
|
If you want to clear all namespaces, call :py:meth:`SchemaConverter.clear_ref_cache <jambo.SchemaConverter.clear_ref_cache>` passing `None` as the namespace,
|
||||||
|
which removes all entries from all namespaces.
|
||||||
|
|
||||||
|
|
||||||
Notes and Behavioural Differences
|
Notes and Behavioural Differences
|
||||||
|
|||||||
@@ -99,8 +99,8 @@ the instance method persists and exposes the reference cache and provides helper
|
|||||||
|
|
||||||
.. warning::
|
.. warning::
|
||||||
The instance API with reference cache can lead to schema and type name collisions if not managed carefully.
|
The instance API with reference cache can lead to schema and type name collisions if not managed carefully.
|
||||||
It's recommended that each namespace or schema source uses its own `SchemaConverter` instance.
|
It's recommended that each schema defines its own unique namespace using the `$id` property in JSON Schema,
|
||||||
If you don't need cache control, the static API is simpler and sufficient for most use cases.
|
and that access it's ref_cache by passing it explicitly when needed.
|
||||||
|
|
||||||
For details and examples about the reference cache and the different cache modes (instance cache, per-call cache, ephemeral cache), see:
|
For details and examples about the reference cache and the different cache modes (instance cache, per-call cache, ephemeral cache), see:
|
||||||
|
|
||||||
|
|||||||
@@ -17,10 +17,12 @@ class SchemaConverter:
|
|||||||
fields and types. The generated model can be used for data validation and serialization.
|
fields and types. The generated model can be used for data validation and serialization.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, ref_cache: Optional[RefCacheDict] = None) -> None:
|
def __init__(
|
||||||
if ref_cache is None:
|
self, namespace_registry: Optional[dict[str, RefCacheDict]] = None
|
||||||
ref_cache = dict()
|
) -> None:
|
||||||
self._ref_cache = ref_cache
|
if namespace_registry is None:
|
||||||
|
namespace_registry = dict()
|
||||||
|
self._namespace_registry = namespace_registry
|
||||||
|
|
||||||
def build_with_cache(
|
def build_with_cache(
|
||||||
self,
|
self,
|
||||||
@@ -43,7 +45,8 @@ class SchemaConverter:
|
|||||||
if without_cache:
|
if without_cache:
|
||||||
local_ref_cache = dict()
|
local_ref_cache = dict()
|
||||||
elif ref_cache is None:
|
elif ref_cache is None:
|
||||||
local_ref_cache = self._ref_cache
|
namespace = schema.get("$id", "default")
|
||||||
|
local_ref_cache = self._namespace_registry.setdefault(namespace, dict())
|
||||||
else:
|
else:
|
||||||
local_ref_cache = ref_cache
|
local_ref_cache = ref_cache
|
||||||
|
|
||||||
@@ -107,19 +110,28 @@ class SchemaConverter:
|
|||||||
unsupported_field=unsupported_type,
|
unsupported_field=unsupported_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
def clear_ref_cache(self) -> None:
|
def clear_ref_cache(self, namespace: Optional[str] = "default") -> None:
|
||||||
"""
|
"""
|
||||||
Clears the reference cache.
|
Clears the reference cache.
|
||||||
"""
|
"""
|
||||||
self._ref_cache.clear()
|
if namespace is None:
|
||||||
|
self._namespace_registry.clear()
|
||||||
|
return
|
||||||
|
|
||||||
def get_cached_ref(self, ref_name: str):
|
if namespace in self._namespace_registry:
|
||||||
|
self._namespace_registry[namespace].clear()
|
||||||
|
|
||||||
|
def get_cached_ref(
|
||||||
|
self, ref_name: str, namespace: str = "default"
|
||||||
|
) -> Optional[type]:
|
||||||
"""
|
"""
|
||||||
Gets a cached reference from the reference cache.
|
Gets a cached reference from the reference cache.
|
||||||
:param ref_name: The name of the reference to get.
|
:param ref_name: The name of the reference to get.
|
||||||
:return: The cached reference, or None if not found.
|
:return: The cached reference, or None if not found.
|
||||||
"""
|
"""
|
||||||
cached_type = self._ref_cache.get(ref_name)
|
cached_type = self._namespace_registry.get(
|
||||||
|
namespace, {}
|
||||||
|
).get(ref_name)
|
||||||
|
|
||||||
if isinstance(cached_type, type):
|
if isinstance(cached_type, type):
|
||||||
return cached_type
|
return cached_type
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class TestSchemaConverter(TestCase):
|
|||||||
self.converter = SchemaConverter()
|
self.converter = SchemaConverter()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.converter.clear_ref_cache()
|
self.converter.clear_ref_cache(namespace=None)
|
||||||
|
|
||||||
def test_invalid_schema(self):
|
def test_invalid_schema(self):
|
||||||
schema = {
|
schema = {
|
||||||
@@ -877,7 +877,6 @@ class TestSchemaConverter(TestCase):
|
|||||||
converter2 = SchemaConverter(ref_cache)
|
converter2 = SchemaConverter(ref_cache)
|
||||||
model2 = converter2.build_with_cache(schema)
|
model2 = converter2.build_with_cache(schema)
|
||||||
|
|
||||||
self.assertIs(converter1._ref_cache, converter2._ref_cache)
|
|
||||||
self.assertIs(model1, model2)
|
self.assertIs(model1, model2)
|
||||||
|
|
||||||
def test_instance_level_ref_cache_isolation_via_without_cache_param(self):
|
def test_instance_level_ref_cache_isolation_via_without_cache_param(self):
|
||||||
@@ -1041,3 +1040,131 @@ class TestSchemaConverter(TestCase):
|
|||||||
|
|
||||||
with self.assertRaises(InvalidSchemaException):
|
with self.assertRaises(InvalidSchemaException):
|
||||||
self.converter.build_with_cache(schema)
|
self.converter.build_with_cache(schema)
|
||||||
|
|
||||||
|
def tests_instance_level_ref_cache_isolation_via_property_id(self):
|
||||||
|
schema1: JSONSchema = {
|
||||||
|
"$id": "http://example.com/schemas/person1.json",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"emergency_contact": {
|
||||||
|
"$ref": "#",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["name", "age"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model1 = self.converter.build_with_cache(schema1)
|
||||||
|
|
||||||
|
schema2: JSONSchema = {
|
||||||
|
"$id": "http://example.com/schemas/person2.json",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"address": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name", "age", "address"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model2 = self.converter.build_with_cache(schema2)
|
||||||
|
|
||||||
|
self.assertIsNot(model1, model2)
|
||||||
|
|
||||||
|
def tests_instance_level_ref_cache_colision_when_same_property_id(self):
|
||||||
|
schema1: JSONSchema = {
|
||||||
|
"$id": "http://example.com/schemas/person.json",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"emergency_contact": {
|
||||||
|
"$ref": "#",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["name", "age"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model1 = self.converter.build_with_cache(schema1)
|
||||||
|
|
||||||
|
schema2: JSONSchema = {
|
||||||
|
"$id": "http://example.com/schemas/person.json",
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"address": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["name", "age", "address"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model2 = self.converter.build_with_cache(schema2)
|
||||||
|
|
||||||
|
self.assertIs(model1, model2)
|
||||||
|
|
||||||
|
def test_namespace_isolation_via_on_call_config(self):
|
||||||
|
namespace = "namespace1"
|
||||||
|
|
||||||
|
schema: JSONSchema = {
|
||||||
|
"$id": namespace,
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"address": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"street": {"type": "string"},
|
||||||
|
"city": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["street", "city"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["name", "age", "address"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model = self.converter.build_with_cache(schema)
|
||||||
|
|
||||||
|
invalid_cached_model = self.converter.get_cached_ref("Person")
|
||||||
|
self.assertIsNone(invalid_cached_model)
|
||||||
|
|
||||||
|
cached_model = self.converter.get_cached_ref("Person", namespace=namespace)
|
||||||
|
self.assertIs(model, cached_model)
|
||||||
|
|
||||||
|
def test_clear_namespace_registry(self):
|
||||||
|
namespace = "namespace_to_clear"
|
||||||
|
|
||||||
|
schema: JSONSchema = {
|
||||||
|
"$id": namespace,
|
||||||
|
"title": "Person",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"age": {"type": "integer"},
|
||||||
|
"address": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"street": {"type": "string"},
|
||||||
|
"city": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["street", "city"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["name", "age", "address"],
|
||||||
|
}
|
||||||
|
|
||||||
|
model = self.converter.build_with_cache(schema)
|
||||||
|
|
||||||
|
cached_model = self.converter.get_cached_ref("Person", namespace=namespace)
|
||||||
|
self.assertIs(model, cached_model)
|
||||||
|
|
||||||
|
self.converter.clear_ref_cache(namespace=namespace)
|
||||||
|
|
||||||
|
cleared_cached_model = self.converter.get_cached_ref("Person", namespace=namespace)
|
||||||
|
self.assertIsNone(cleared_cached_model)
|
||||||
|
|||||||
Reference in New Issue
Block a user