Skip to content

feat: ROOT-45: Filter by annotator prototype #7783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"""
import logging
import re
from collections import defaultdict
from datetime import datetime
from functools import reduce
from typing import ClassVar

import ujson as json
Expand Down Expand Up @@ -261,11 +263,15 @@ def apply_filters(queryset, filters, project, request):
if not filters:
return queryset

parent_id_to_filter_expressions: dict[int, list[Q]] = defaultdict(list)

# convert conjunction to orm statement
filter_expressions = []
custom_filter_expressions = load_func(settings.DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS)

for _filter in filters.items:
# combine child filters with their parent in the same filter expression
parent_id = _filter.parent_id if _filter.parent_id is not None else _filter.id
filter_expressions = parent_id_to_filter_expressions[parent_id]

# we can also have annotations filters
if not _filter.filter.startswith('filter:tasks:') or _filter.value is None:
Expand Down Expand Up @@ -458,11 +464,13 @@ def apply_filters(queryset, filters, project, request):
"""
if filters.conjunction == ConjunctionEnum.OR:
result_filter = Q()
for filter_expression in filter_expressions:
for filter_expressions in parent_id_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
result_filter.add(filter_expression, Q.OR)
queryset = queryset.filter(result_filter)
else:
for filter_expression in filter_expressions:
for filter_expressions in parent_id_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
queryset = queryset.filter(filter_expression)
return queryset

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 5.1.10 on 2025-06-17 18:27

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("data_manager", "0012_alter_view_user"),
]

operations = [
migrations.AddField(
model_name="filter",
name="parent",
field=models.ForeignKey(
blank=True,
help_text="Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)",
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="children",
to="data_manager.filter",
db_index=False,
),
),
migrations.AlterField(
model_name="filter",
name="index",
field=models.IntegerField(
blank=True,
default=None,
help_text="Display order among root filters only",
null=True,
verbose_name="index",
),
),
]
22 changes: 21 additions & 1 deletion label_studio/data_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ def get_prepare_tasks_params(self, add_selected_items=False):
for f in self.filter_group.filters.all():
items.append(
dict(
id=f.id,
filter=f.column,
operator=f.operator,
type=f.type,
value=f.value,
parent_id=f.parent.id if f.parent else None,
)
)
filters = dict(conjunction=self.filter_group.conjunction, items=items)
Expand All @@ -86,7 +88,25 @@ class FilterGroup(models.Model):


class Filter(models.Model):
index = models.IntegerField(_('index'), default=0, help_text='To keep filter order')
# Optional reference to a parent filter. We only allow **one** level of nesting.
parent = models.ForeignKey(
'self',
on_delete=models.CASCADE,
related_name='children',
null=True,
blank=True,
db_index=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to satisfy the linter? While it's strictly necessary, I always like to have indices on FKs to support flexible querying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep - I tried adding an index in a concurrent migration, but wasn't able to do it in a way that passed the migration linter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's fine to suppress the migration linter like so:

- the index operation will be fast because this is a new column where all the new values will be NULL

help_text='Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)',
)

# `index` is now only meaningful for **root** filters (parent is NULL)
index = models.IntegerField(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way we could name this field to make it clearer what it really represents? display_index or display_order_index would both help in this regard, imo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field isn't new, renaming it would have increased change surface area, but yes it's possible. Could do it in a separate chore PR

_('index'),
null=True,
blank=True,
default=None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit - as long as we are making other changes, we can remove this

Suggested change
default=None,

help_text='Display order among root filters only',
)
column = models.CharField(_('column'), max_length=1024, help_text='Field name')
type = models.CharField(_('type'), max_length=1024, help_text='Field type')
operator = models.CharField(_('operator'), max_length=1024, help_text='Filter operator')
Expand Down
3 changes: 3 additions & 0 deletions label_studio/data_manager/prepare_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ class FilterIn(BaseModel):


class Filter(BaseModel):
id: Optional[int] = None # Database ID of this filter (present in API responses)
parent_id: Optional[int] = None # Parent filter DB ID (only for child filters)

filter: str
operator: str
type: str
Expand Down
111 changes: 90 additions & 21 deletions label_studio/data_manager/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,41 @@ def to_internal_value(self, data):
if 'filter_group' not in data and conjunction:
data['filter_group'] = {'conjunction': conjunction, 'filters': []}
if 'items' in filters:
# Support two input formats:
# 1) "flat" list where potential children reference their parent via ``parent_index`` (legacy)
# 2) "nested" list where each root item may contain ``child_filters`` (new preferred format).

root_counter = 0 # keeps track of the current root filter index

def _append_filter(src_filter, parent_idx=None):
"""Convert a single filter JSON object into internal representation."""

filter_payload = {
'column': src_filter.get('filter', ''),
'operator': src_filter.get('operator', ''),
'type': src_filter.get('type', ''),
'value': src_filter.get('value', {}),
}

# Explicit parent reference by DB id (rare in create requests)
if src_filter.get('parent') is not None:
filter_payload['parent'] = src_filter.get('parent')

data['filter_group']['filters'].append(filter_payload)

# Iterate over top-level items (roots)
for f in filters['items']:
data['filter_group']['filters'].append(
{
'column': f.get('filter', ''),
'operator': f.get('operator', ''),
'type': f.get('type', ''),
'value': f.get('value', {}),
}
)
# Root filter itself
_append_filter(f)

# Process potential nested children
child_filters = f.get('child_filters', []) or []
for child in child_filters:
_append_filter(child, parent_idx=root_counter)

# Increment root index AFTER children have been processed so that
# subsequent roots receive the proper index.
root_counter += 1

ordering = _data.pop('ordering', {})
data['ordering'] = ordering
Expand All @@ -137,15 +163,28 @@ def to_representation(self, instance):
filters.pop('filters', [])
filters.pop('id', None)

for f in instance.filter_group.filters.order_by('index'):
filters['items'].append(
{
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}
)
# Root filters first (ordered by index), followed by child filters (any order)
roots = instance.filter_group.filters.filter(parent__isnull=True).order_by('index')
children = instance.filter_group.filters.filter(parent__isnull=False)

for f in list(roots) + list(children):
item = {
'id': f.id,
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}

# Relationship hints
if f.parent_id:
item['parent_id'] = f.parent_id
else:
child_ids = list(f.children.all().values_list('id', flat=True))
if child_ids:
item['child_ids'] = child_ids

filters['items'].append(item)
result['data']['filters'] = filters

selected_items = result.pop('selected_items', {})
Expand All @@ -159,11 +198,41 @@ def to_representation(self, instance):

@staticmethod
def _create_filters(filter_group, filters_data):
filter_index = 0
"""Create Filter objects inside the provided ``filter_group``.

* For **root** filters (``parent`` is ``None``) we enumerate the
``index`` so that the UI can preserve left-to-right order.
* For **child** filters we leave ``index`` as ``None`` – they are not
shown in the top-level ordering bar.
"""

# Pass 1 — create root filters (no parent) and build index→object map
index_to_filter = {}
next_index = 0
for filter_data in filters_data:
is_root = filter_data.get('parent') in (None, '')

if not is_root:
continue

# Enumerate root filters for stable ordering
filter_data['index'] = next_index
next_index += 1

root_obj = Filter.objects.create(**filter_data)
filter_group.filters.add(root_obj)
index_to_filter[root_obj.index] = root_obj

# Pass 2 — create child filters, resolving parent via `parent` PK or `parent_index`
for filter_data in filters_data:
filter_data['index'] = filter_index
filter_group.filters.add(Filter.objects.create(**filter_data))
filter_index += 1
has_parent_pk = filter_data.get('parent') not in (None, '')

if not has_parent_pk:
# Already processed as a root filter
continue

child_obj = Filter.objects.create(**filter_data)
filter_group.filters.add(child_obj)

def create(self, validated_data):
with transaction.atomic():
Expand Down
10 changes: 8 additions & 2 deletions label_studio/tests/data_manager/test_views_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ def test_views_api_filters(business_client, project_id):
)

assert response.status_code == 200, response.content
assert response.json()['data'] == payload['data']
response_data = response.json()['data']
for item in response_data['filters']['items']:
item.pop('id')
assert response_data == payload['data']

updated_payload = dict(
project=project_id,
Expand Down Expand Up @@ -187,7 +190,10 @@ def test_views_api_filters(business_client, project_id):
)

assert response.status_code == 200, response.content
assert response.json()['data'] == updated_payload['data']
response_data = response.json()['data']
for item in response_data['filters']['items']:
item.pop('id')
assert response_data == updated_payload['data']


def test_views_ordered_by_id(business_client, project_id):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ const GroupWrapper = ({ children, wrap = false }) => {
};

export const FilterLine = observer(({ filter, availableFilters, index, view, sidebar, dropdownClassName }) => {
// Determine this filter's position in the master filters array; child filters store
// a reference to their parent via this absolute index, not the index within the
// (potentially filtered) list rendered in the sidebar. Using the absolute index
// guarantees that child filters are rendered inline with their correct parent
// even when the sidebar list omits them.
const parentIdx = filter.view.filters.indexOf(filter);

const childFilters = filter.view.filters.filter((f) => f.parent_index === parentIdx);

return (
<Block name="filter-line" tag={Fragment}>
<GroupWrapper wrap={sidebar}>
Expand Down Expand Up @@ -72,6 +81,24 @@ export const FilterLine = observer(({ filter, availableFilters, index, view, sid
</GroupWrapper>
<GroupWrapper wrap={sidebar}>
<FilterOperation filter={filter} value={filter.currentValue} operator={filter.operator} field={filter.field} />

{/* Render child filters (join filters) inline */}
{childFilters.map((child, idx) => {
console.debug("[DM] render child filter", { parent: filter, child });
return (
<Elem name="column" key={`child-${idx}`} mix="value">
<Tag size="small" className="filters-data-tag" color="#1d91e4" style={{ marginRight: 4 }}>
{child.field.title}
</Tag>
<FilterOperation
filter={child}
value={child.currentValue}
operator={child.operator}
field={child.field}
/>
</Elem>
);
})}
</GroupWrapper>
<Elem name="remove">
<Button
Expand Down
Loading
Loading