Skip to content

feat: ROOT-45: Filter by annotator prototype #7783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"""
import logging
import re
from collections import defaultdict
from datetime import datetime
from functools import reduce
from typing import ClassVar

import ujson as json
Expand Down Expand Up @@ -261,11 +263,15 @@ def apply_filters(queryset, filters, project, request):
if not filters:
return queryset

parent_id_to_filter_expressions: dict[int, list[Q]] = defaultdict(list)

# convert conjunction to orm statement
filter_expressions = []
custom_filter_expressions = load_func(settings.DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS)

for _filter in filters.items:
# combine child filters with their parent in the same filter expression
parent_id = _filter.parent if _filter.parent is not None else _filter.id
filter_expressions = parent_id_to_filter_expressions[parent_id]

# we can also have annotations filters
if not _filter.filter.startswith('filter:tasks:') or _filter.value is None:
Expand Down Expand Up @@ -458,11 +464,13 @@ def apply_filters(queryset, filters, project, request):
"""
if filters.conjunction == ConjunctionEnum.OR:
result_filter = Q()
for filter_expression in filter_expressions:
for filter_expressions in parent_id_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
result_filter.add(filter_expression, Q.OR)
queryset = queryset.filter(result_filter)
else:
for filter_expression in filter_expressions:
for filter_expressions in parent_id_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
queryset = queryset.filter(filter_expression)
return queryset

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 5.1.10 on 2025-06-17 18:27

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("data_manager", "0012_alter_view_user"),
]

operations = [
migrations.AddField(
model_name="filter",
name="parent",
field=models.ForeignKey(
blank=True,
help_text="Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)",
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="children",
to="data_manager.filter",
db_index=False,
),
),
migrations.AlterField(
model_name="filter",
name="index",
field=models.IntegerField(
blank=True,
default=None,
help_text="Display order among root filters only",
null=True,
verbose_name="index",
),
),
]
22 changes: 21 additions & 1 deletion label_studio/data_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ def get_prepare_tasks_params(self, add_selected_items=False):
for f in self.filter_group.filters.all():
items.append(
dict(
id=f.id,
filter=f.column,
operator=f.operator,
type=f.type,
value=f.value,
parent_id=f.parent.id if f.parent else None,
)
)
filters = dict(conjunction=self.filter_group.conjunction, items=items)
Expand All @@ -86,7 +88,25 @@ class FilterGroup(models.Model):


class Filter(models.Model):
index = models.IntegerField(_('index'), default=0, help_text='To keep filter order')
# Optional reference to a parent filter. We only allow **one** level of nesting.
parent = models.ForeignKey(
'self',
on_delete=models.CASCADE,
related_name='children',
null=True,
blank=True,
db_index=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to satisfy the linter? While it's strictly necessary, I always like to have indices on FKs to support flexible querying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep - I tried adding an index in a concurrent migration, but wasn't able to do it in a way that passed the migration linter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's fine to suppress the migration linter like so:

- the index operation will be fast because this is a new column where all the new values will be NULL

help_text='Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)',
)

# `index` is now only meaningful for **root** filters (parent is NULL)
index = models.IntegerField(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way we could name this field to make it clearer what it really represents? display_index or display_order_index would both help in this regard, imo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field isn't new, renaming it would have increased change surface area, but yes it's possible. Could do it in a separate chore PR

_('index'),
null=True,
blank=True,
default=None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit - as long as we are making other changes, we can remove this

Suggested change
default=None,

help_text='Display order among root filters only',
)
column = models.CharField(_('column'), max_length=1024, help_text='Field name')
type = models.CharField(_('type'), max_length=1024, help_text='Field type')
operator = models.CharField(_('operator'), max_length=1024, help_text='Filter operator')
Expand Down
4 changes: 4 additions & 0 deletions label_studio/data_manager/prepare_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ class FilterIn(BaseModel):


class Filter(BaseModel):
id: Optional[int] = None # Database ID of this filter
parent: Optional[int] = None # Parent filter DB ID (only for child filters)
children: Optional[List[int]] = None # Child filter DB IDs

filter: str
operator: str
type: str
Expand Down
124 changes: 102 additions & 22 deletions label_studio/data_manager/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,31 @@
from label_studio.core.utils.common import round_floats


class RecursiveField(serializers.Serializer):
def to_representation(self, value):
parent = self.parent.parent # the owning serializer instance
serializer = parent.__class__(value, context=self.context)
return serializer.data

def to_internal_value(self, data):
"""Allow RecursiveField to be writable.

We instantiate the *parent* serializer class (which in this case is
``FilterSerializer``) to validate the nested payload. The validated
data produced by that serializer is returned so that the enclosing
serializer (``FilterSerializer``) can include it in its own
``validated_data`` structure.
"""

parent_cls = self.parent.parent.__class__ # FilterSerializer
serializer = parent_cls(data=data, context=self.context)
serializer.is_valid(raise_exception=True)
return serializer.validated_data


class FilterSerializer(serializers.ModelSerializer):
child_filters = RecursiveField(many=True, required=False)

class Meta:
model = Filter
fields = '__all__'
Expand Down Expand Up @@ -114,15 +138,32 @@ def to_internal_value(self, data):
if 'filter_group' not in data and conjunction:
data['filter_group'] = {'conjunction': conjunction, 'filters': []}
if 'items' in filters:
# Support two input formats:
# 1) "flat" list where potential children reference their parent via ``parent``
# 2) "nested" list where each root item may contain ``child_filters``

def _convert_filter(src_filter):
"""Convert a single filter JSON object into internal representation."""

filter_payload = {
'column': src_filter.get('filter', ''),
'operator': src_filter.get('operator', ''),
'type': src_filter.get('type', ''),
'value': src_filter.get('value', {}),
}

# Explicit parent reference by DB id (rare in create requests)
if (parent := src_filter.get('parent')) is not None:
filter_payload['parent'] = parent

if child_filters := src_filter.get('child_filters'):
filter_payload['child_filters'] = [_convert_filter(child) for child in child_filters]

return filter_payload

# Iterate over top-level items (roots)
for f in filters['items']:
data['filter_group']['filters'].append(
{
'column': f.get('filter', ''),
'operator': f.get('operator', ''),
'type': f.get('type', ''),
'value': f.get('value', {}),
}
)
data['filter_group']['filters'].append(_convert_filter(f))

ordering = _data.pop('ordering', {})
data['ordering'] = ordering
Expand All @@ -137,15 +178,26 @@ def to_representation(self, instance):
filters.pop('filters', [])
filters.pop('id', None)

for f in instance.filter_group.filters.order_by('index'):
filters['items'].append(
{
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}
)
# Root filters first (ordered by index), followed by child filters (any order)
roots = instance.filter_group.filters.filter(parent__isnull=True).order_by('index')
children = instance.filter_group.filters.filter(parent__isnull=False)

for f in list(roots) + list(children):
item = {
'id': f.id,
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}

# Relationship hints
if f.parent:
item['parent'] = f.parent.id
if children := list(f.children.all().values_list('id', flat=True)):
item['children'] = children

filters['items'].append(item)
result['data']['filters'] = filters

selected_items = result.pop('selected_items', {})
Expand All @@ -159,11 +211,39 @@ def to_representation(self, instance):

@staticmethod
def _create_filters(filter_group, filters_data):
filter_index = 0
for filter_data in filters_data:
filter_data['index'] = filter_index
filter_group.filters.add(Filter.objects.create(**filter_data))
filter_index += 1
"""Create Filter objects inside the provided ``filter_group``.

* For **root** filters (``parent`` is ``None``) we enumerate the
``index`` so that the UI can preserve left-to-right order.
* For **child** filters we leave ``index`` as ``None`` – they are not
shown in the top-level ordering bar.
"""

def _create_recursive(data, parent=None, index=None):

# Extract nested children early (if any) and remove them from payload
children = data.pop('child_filters', [])

# Handle explicit parent reference present in the JSON payload only
# for root elements. For nested structures we rely on the actual
# ``parent`` FK object instead of its primary key.
if parent is not None:
data.pop('parent', None)

# Assign display order for root filters
if parent is None:
data['index'] = index

# Persist the filter
obj = Filter.objects.create(parent=parent, **data)
filter_group.filters.add(obj)

# Recurse into children (if any)
for child in children:
_create_recursive(child, parent=obj)

for index, data in enumerate(filters_data):
_create_recursive(data, index=index)

def create(self, validated_data):
with transaction.atomic():
Expand Down
10 changes: 8 additions & 2 deletions label_studio/tests/data_manager/test_views_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ def test_views_api_filters(business_client, project_id):
)

assert response.status_code == 200, response.content
assert response.json()['data'] == payload['data']
response_data = response.json()['data']
for item in response_data['filters']['items']:
item.pop('id')
assert response_data == payload['data']

updated_payload = dict(
project=project_id,
Expand Down Expand Up @@ -187,7 +190,10 @@ def test_views_api_filters(business_client, project_id):
)

assert response.status_code == 200, response.content
assert response.json()['data'] == updated_payload['data']
response_data = response.json()['data']
for item in response_data['filters']['items']:
item.pop('id')
assert response_data == updated_payload['data']


def test_views_ordered_by_id(business_client, project_id):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const GroupWrapper = ({ children, wrap = false }) => {
};

export const FilterLine = observer(({ filter, availableFilters, index, view, sidebar, dropdownClassName }) => {
const childFilters = filter.view.filters.filter((f) => f.parent === filter.id);

return (
<Block name="filter-line" tag={Fragment}>
<GroupWrapper wrap={sidebar}>
Expand Down Expand Up @@ -72,6 +74,24 @@ export const FilterLine = observer(({ filter, availableFilters, index, view, sid
</GroupWrapper>
<GroupWrapper wrap={sidebar}>
<FilterOperation filter={filter} value={filter.currentValue} operator={filter.operator} field={filter.field} />

{/* Render child filters (join filters) inline */}
{childFilters.map((child, idx) => {
console.debug("[DM] render child filter", { parent: filter, child });
return (
<Elem name="column" key={`child-${idx}`} mix="value">
<Tag size="small" className="filters-data-tag" color="#1d91e4" style={{ marginRight: 4 }}>
{child.field.title}
</Tag>
<FilterOperation
filter={child}
value={child.currentValue}
operator={child.operator}
field={child.field}
/>
</Elem>
);
})}
</GroupWrapper>
<Elem name="remove">
<Button
Expand Down
Loading
Loading