We are gonna’ look into cloning Django model instances, applicability of Iterator and Visitor patterns, and a little bit into Django models metadata. If you want to look into final code - visit github repository - https://github.com/regqueryvalueex/django-relations-iterator
Lately, working with some Django code I faced a need to implement proper cloning feature. A brief search on the internet gave me some packages for cloning, but I wasn’t satisfied with those. So it’s time to write some code by myself.
Simplest clone in Django looks like this
# models.py
class Meeting(models.Model):
title = models.CharField(max_length=200)
# clone
instance = Meeting.objects.last()
instance.pk = None
instance.save()
This will create new row in database with the same data as original instance have. However if you want also clone some related objects, you may have to manually go over all related objects and clone them as well. Also don’t forget to set correct foreign key value, so you will have correct hierarchy
# models.py
class Meeting(models.Model):
title = models.CharField(max_length=200)
class Participation(models.Model):
meeting = models.ForeignKey('Meeting', on_delete=models.CASCADE, related_name='participations')
# clone
def clone(instance):
instance.pk = None
instance.save()
return instance
instance = Meeting.objects.last()
related_objects = instance.participations.all()
cloned_instance = clone(instance)
for related_object in related_objects:
related_object.meeting = cloned_instance
clone(related_object)
Well, it works, kinda. Lets summarize issues with this
But what exactly do we want from this feature?
Lets see what we can do…
from django.conf import settings
from django.db import models
class Meeting(models.Model):
title = models.CharField(max_length=200)
time = models.DateTimeField(null=True, blank=True)
participants = models.ManyToManyField(
settings.AUTH_USER_MODEL,
through='Participation',
blank=True
)
class Participation(models.Model):
meeting = models.ForeignKey(
'Meeting',
on_delete=models.CASCADE,
related_name='participations'
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name='participations'
)
class Invitation(models.Model):
status = models.CharField(max_length=20, default='sent)
participation = models.ForeignKey(
'Participation',
on_delete=models.CASCADE,
related_name='invitations'
)
class Comment(models.Model):
meeting = models.ForeignKey(
'Meeting',
on_delete=models.CASCADE,
related_name='comments'
)
user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
description = models.TextField(max_length=3000)
Django models have an interface for model metadata, we can read info about relations there
print(Meeting._meta.related_objects)
# (<ManyToOneRel: meetings.participation>, <ManyToOneRel: meetings.comment>)
print(Participation._meta.related_objects)
# (<ManyToOneRel: meetings.invitation>,)
print([relation.get_accessor_name() for relation in Meeting._meta.related_objects])
# ['participations', 'comments']
Method get_accessor_name
simply returns a name, that can be used to access related objects, also we can retrieve corresponding relation field with that field
print(Meeting._meta.get_field('participations'))
<ManyToOneRel: meetings.participation>
print(Meeting._meta.get_field('comments'))
<ManyToOneRel: meetings.comment>
Good, so considering this we can create simple configuration, using relation names. Lets say, we want to include Meeting
, Participation
, and Invitation
, but Comment
must be excluded.
# Simple config
structure = {
'participations': {
'invitations': {} # No relations - empty dict
} # `comments` isn't here
}
# Let's create instances first
tom = User.objects.create(username='Tom')
jerry = User.objects.create(username='Jerry')
meeting = Meeting.objects.create(title='dinner')
tom_participation = Participation.objects.create(user_id=tom.id, meeting_id=meeting.id)
jerry_participation = Participation.objects.create(user_id=jerry.id, meeting_id=meeting.id)
Invitation.objects.create(user_id=jerry.id, meeting_id=meeting.id)
Comment.objects.create(user_id=jerry.id, meeting_id=meeting.id)
Now, when we have some data, we can work with this. So, considering the nature of the data, we can use recursion to iterate over our config and put data, obtained from db, into dict. We can tell, that result must look like a tree, so lets create a couple of classes and make some basic tree
import typing
from django.db.models import Model, ManyToManyRel, ManyToOneRel, OneToOneRel
Relation = typing.Union[ManyToManyRel, ManyToOneRel, OneToOneRel]
RelationTree = typing.Dict['TreeNode', typing.Dict[Relation, 'RelationTree']]
RelationTreeConfig = typing.Dict[str, 'RelationTreeConfig']
class TreeNode:
def __init__(
self,
*,
instance: Model,
parent: typing.Optional['TreeNode'] = None,
relation: typing.Optional[Relation] = None
):
self.instance = instance
self.parent = parent
self.relation = relation
@property
def model_class(self):
return type(self.instance)
def __hash__(self):
return hash(f'{str(self.model_class)}-{self.instance.id}')
def __repr__(self):
return f'<{type(self).__name__} for {repr(self.instance).strip("<>")}>'
class ConfigurableRelationTree:
def __init__(self, *, root: Model, structure: RelationTreeConfig):
self.root: Model = root
self.structure: RelationTreeConfig = structure
self.tree: RelationTree = self.collect()
def collect(
self,
*,
root_node: typing.Optional[TreeNode] = None,
structure: typing.Optional[RelationTreeConfig] = None
) -> RelationTree:
if not root_node:
root_node = self.get_node(instance=self.root)
root = root_node.instance
structure = structure if structure is not None else self.structure
subtree = {}
tree = {root_node: subtree}
for sub_relation_name, substructure in structure.items():
sub_relation = root._meta.get_field(sub_relation_name)
related_instances = self._get_related_instances(instance=root, relation=sub_relation)
subtree[sub_relation] = {}
for related_instance in related_instances:
node = self.get_node(instance=related_instance, relation=sub_relation, parent=root_node)
subtree[sub_relation].update(
self.collect(root_node=node, structure=substructure)
)
return tree
def _get_related_instances(
self,
*,
instance: Model,
relation: Relation
) -> typing.List[Model]:
accessor_name = relation.get_accessor_name()
if relation.one_to_one:
instance = getattr(instance, accessor_name, None)
related_instances = [instance] if instance is not None else []
else:
related_instances = list(getattr(instance, accessor_name).all())
return related_instances
def get_node(
self,
*,
instance: Model,
parent: typing.Optional[TreeNode] = None,
relation: typing.Optional[Relation] = None
) -> TreeNode:
return TreeNode(
instance=instance,
parent=parent,
relation=relation,
)
Lets see what do we have:
from pprint import pprint
tree = ConfigurableModelTree(root=meeting, structure=structure)
pprint(tree._tree)
# {
# <TreeNode for Meeting: Meeting object (1)>: {
# <ManyToOneRel: meetings.participation>: {
# <TreeNode for Participation: Participation object (1)>: {
# <ManyToOneRel: meetings.invitation>: {
# <TreeNode for Invitation: Invitation object (1)>: {}
# }
# },
# <TreeNode for Participation: Participation object (2)>: {
# <ManyToOneRel: meetings.invitation>: {}
# }
# }
# }
# }
Good, seems like we have correct structure. So how we gonna use it? Here, where’s patterns come to our help
Fortunately, programmers around the world have been working with data for many years and have many good solutions to solve almost any question. Our question is -
How to create reusable solution to iterate over our tree structure?
Answer is - Iterator pattern
Iterator is a behavioral design pattern that lets you traverse elements of a collection without exposing its underlying representation (list, stack, tree, etc.).
Our other question:
How to create a reusable solution to operate with tree nodes?
And the answer for that - Visitor pattern
Visitor is a behavioral design pattern that lets you separate algorithms from the objects on which they operate.
And what event better - these patterns work great together.
Python have built-in iterator protocol, so we can simply just use that. Simple implementation of __iter__
method, that returns generator will be enough.
import typing
from abc import ABC, abstractmethod
# I will use abstract class to define interface
class AbstractRelationTreeIterator(ABC):
@abstractmethod
def get_iterator(self, tree: typing.Optional[RelationTree] = None):
pass
def __iter__(self):
return self.get_iterator()
class RelationTreeIterator(AbstractRelationTreeIterator):
def __init__(self, tree: ConfigurableRelationTree):
self.tree = tree
def get_iterator(self, tree: typing.Optional[RelationTree] = None):
tree = tree if tree is not None else self.tree.tree
for node, subtree in tree.items():
yield node
for relation, subnodes in subtree.items():
# since we iterate over tree, we need recursion here as well
yield from self.get_iterator(subnodes)
For visitor, it’s even simpler:
class AbstractVisitor(ABC):
@abstractmethod
def visit(self, node: TreeNode):
pass
class CloneVisitor(AbstractVisitor):
def visit(self, node: TreeNode):
node.instance.pk = None
if node.parent is not None:
parent_joining_column, instance_joining_column = node.relation.get_joining_columns()[0]
setattr(
node.instance,
instance_joining_column,
getattr(node.parent.instance, parent_joining_column)
)
node.instance.save()
Method get_joining_columns
will return columns, that involved in relation between models. For Meeting
and Participation
it will return (('id', 'meeting_id'),)
, and that exactly what we need, since participation.meeting_id = meeting.id
is a correct foreign key assignment.
So, how do you use visitor and iterator together. Well you just iterate over your data structure using iterator and use visitor for every item:
tree = ConfigurableRelationTree(root=meeting, structure=structure)
visitor = CloneVisitor()
for node in RelationTreeIterator(tree):
visitor.visit(node)
pprint(tree.tree)
# {
# <TreeNode for Meeting: Meeting object (3)>: {
# <ManyToOneRel: meetings.participation>: {
# <TreeNode for Participation: Participation object (5)>: {
# <ManyToOneRel: meetings.invitation>: {
# <TreeNode for Invitation: Invitation object (3)>: {}
# }
# },
# <TreeNode for Participation: Participation object (6)>: {
# <ManyToOneRel: meetings.invitation>: {}
# }
# }
# }
# }
As you can see, we have different ids here, so new instances was created in database for this hierarchy.
Its very common, that you need to have control over cloned data. For instance, in our case, we probably don’t want to have the same time
in cloned instance. And maybe we want to add some extra text to meeting title, like f'{original_title}-COPY'
.
Classical Visitor patters suggests us to implement method in our class TreeNode
, that will accept a Visitor instance and will call a proper implementation. That is a good way to handle different requirements for different classes, however I want to keep ConfigurableRelationTree
and TreeNode
untouched, and rely on other tools. Fairly often because different languages have some powerful tools, they allow to simplify some classical patterns. For example First class functions can simplify Strategy pattern or in some cases event replace it.
For our case, we can use functools.singledispatch
from functools import singledispatch
@singledispatch
def customize(instance):
pass
@customize.register
def _(instance: Meeting):
instance.title = f'{instance.title}-COPY'
instance.time = None
# Changed ClonedVisitor
class CloneVisitor(AbstractVisitor):
def visit(self, node: TreeNode):
node.instance.pk = None
if node.parent is not None:
parent_joining_column, instance_joining_column = node.relation.get_joining_columns()[0]
setattr(
node.instance,
instance_joining_column,
getattr(node.parent.instance, parent_joining_column)
)
# added customize call
customize(node.instance)
node.instance.save()
Let’s see now how it works
structure = {
'participations': {
'invitations': {}
}
}
meeting = Meeting.objects.get(title='dinner')
tree = ConfigurableRelationTree(root=meeting, structure=structure)
visitor = CloneVisitor()
for node in RelationTreeIterator(tree):
visitor.visit(node)
print(meeting.title)
# 'dinner-COPY'
print(meeting.time)
# None
Note, that our structure is mutated and we actually overwrite original tree with cloned tree. That’s important, because if you want to use original instances, you need to initialize them again after clone.
Now we have a reusable approach to clone instance hierarchies in Django. However it’s not limited by only clone feature. We actually can use it for other needs and all we need to do most of the time is just create new Visitor implementation, and, maybe, new Iterator implementation. Lets create a quick example, that will print path to our nodes:
class PathPrintVisitor(AbstractVisitor):
def visit(self, node: TreeNode):
print(list(reversed(self.get_path(node))))
def get_path(self, node: TreeNode):
path = [node]
if node.parent:
path.extend(self.get_path(node.parent))
return path
visitor = PathPrintVisitor()
for node in RelationTreeIterator(tree):
visitor.visit(node)
# [<TreeNode for Meeting: Meeting object (5)>]
# [<TreeNode for Meeting: Meeting object (5)>, <TreeNode for Participation: Participation object (9)>]
# [<TreeNode for Meeting: Meeting object (5)>, <TreeNode for Participation: Participation object (9)>, <TreeNode for Invitation: Invitation object (5)>]
# [<TreeNode for Meeting: Meeting object (5)>, <TreeNode for Participation: Participation object (10)>]
You may visit my github repo for this implementation and look into the code if you want to - https://github.com/regqueryvalueex/django-relations-iterator