Skip to content

feat: ROOT-45: Filter by annotator prototype #7783

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
"""
import logging
import re
from collections import defaultdict
from datetime import datetime
from functools import reduce
from typing import ClassVar

import ujson as json
Expand Down Expand Up @@ -261,11 +263,15 @@ def apply_filters(queryset, filters, project, request):
if not filters:
return queryset

index_to_filter_expressions: dict[int, list[Q]] = defaultdict(list)

# convert conjunction to orm statement
filter_expressions = []
custom_filter_expressions = load_func(settings.DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS)

for _filter in filters.items:
for index, _filter in enumerate(filters.items):
# combine child filters with their parent in the same filter expression
index = _filter.parent_index if _filter.parent_index is not None else index
filter_expressions = index_to_filter_expressions[index]

# we can also have annotations filters
if not _filter.filter.startswith('filter:tasks:') or _filter.value is None:
Expand Down Expand Up @@ -458,11 +464,13 @@ def apply_filters(queryset, filters, project, request):
"""
if filters.conjunction == ConjunctionEnum.OR:
result_filter = Q()
for filter_expression in filter_expressions:
for filter_expressions in index_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
result_filter.add(filter_expression, Q.OR)
queryset = queryset.filter(result_filter)
else:
for filter_expression in filter_expressions:
for filter_expressions in index_to_filter_expressions.values():
filter_expression = reduce(lambda x, y: x & y, filter_expressions)
queryset = queryset.filter(filter_expression)
return queryset

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 5.1.10 on 2025-06-17 18:27

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("data_manager", "0012_alter_view_user"),
]

operations = [
migrations.AddField(
model_name="filter",
name="parent",
field=models.ForeignKey(
blank=True,
help_text="Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)",
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="children",
to="data_manager.filter",
db_index=False,
),
),
migrations.AlterField(
model_name="filter",
name="index",
field=models.IntegerField(
blank=True,
default=None,
help_text="Display order among root filters only",
null=True,
verbose_name="index",
),
),
]
20 changes: 19 additions & 1 deletion label_studio/data_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,25 @@ class FilterGroup(models.Model):


class Filter(models.Model):
index = models.IntegerField(_('index'), default=0, help_text='To keep filter order')
# Optional reference to a parent filter. We only allow **one** level of nesting.
parent = models.ForeignKey(
'self',
on_delete=models.CASCADE,
related_name='children',
null=True,
blank=True,
db_index=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to satisfy the linter? While it's strictly necessary, I always like to have indices on FKs to support flexible querying

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep - I tried adding an index in a concurrent migration, but wasn't able to do it in a way that passed the migration linter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's fine to suppress the migration linter like so:

- the index operation will be fast because this is a new column where all the new values will be NULL

help_text='Optional parent filter to create one-level hierarchy (child filters are AND-merged with parent)',
)

# `index` is now only meaningful for **root** filters (parent is NULL)
index = models.IntegerField(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way we could name this field to make it clearer what it really represents? display_index or display_order_index would both help in this regard, imo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field isn't new, renaming it would have increased change surface area, but yes it's possible. Could do it in a separate chore PR

_('index'),
null=True,
blank=True,
default=None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit - as long as we are making other changes, we can remove this

Suggested change
default=None,

help_text='Display order among root filters only',
)
column = models.CharField(_('column'), max_length=1024, help_text='Field name')
type = models.CharField(_('type'), max_length=1024, help_text='Field type')
operator = models.CharField(_('operator'), max_length=1024, help_text='Filter operator')
Expand Down
1 change: 1 addition & 0 deletions label_studio/data_manager/prepare_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Filter(BaseModel):
operator: str
type: str
value: Union[StrictInt, StrictFloat, StrictBool, StrictStr, FilterIn, list]
parent_index: Optional[int] = None


class ConjunctionEnum(Enum):
Expand Down
90 changes: 77 additions & 13 deletions label_studio/data_manager/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@


class FilterSerializer(serializers.ModelSerializer):
# Allow children filters to reference parent by root-filter index instead of DB id
parent_index = serializers.IntegerField(
required=False,
allow_null=True,
write_only=True,
help_text='Index of the parent filter inside the same FilterGroup (alternative to `parent` PK)',
)

class Meta:
model = Filter
fields = '__all__'
Expand Down Expand Up @@ -121,6 +129,8 @@
'operator': f.get('operator', ''),
'type': f.get('type', ''),
'value': f.get('value', {}),
'parent': f.get('parent'),
'parent_index': f.get('parent_index'),
}
)

Expand All @@ -137,15 +147,19 @@
filters.pop('filters', [])
filters.pop('id', None)

for f in instance.filter_group.filters.order_by('index'):
filters['items'].append(
{
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}
)
# Root filters first (ordered by index), followed by child filters (any order)
roots = instance.filter_group.filters.filter(parent__isnull=True).order_by('index')
children = instance.filter_group.filters.filter(parent__isnull=False)
for f in list(roots) + list(children):
item = {
'filter': f.column,
'operator': f.operator,
'type': f.type,
'value': f.value,
}
if f.parent_id:
item['parent'] = f.parent_id

Check warning on line 161 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L161

Added line #L161 was not covered by tests
filters['items'].append(item)
result['data']['filters'] = filters

selected_items = result.pop('selected_items', {})
Expand All @@ -159,11 +173,61 @@

@staticmethod
def _create_filters(filter_group, filters_data):
filter_index = 0
"""Create Filter objects inside the provided ``filter_group``.

* For **root** filters (``parent`` is ``None``) we enumerate the
``index`` so that the UI can preserve left-to-right order.
* For **child** filters we leave ``index`` as ``None`` – they are not
shown in the top-level ordering bar.
"""

# Pass 1 — create root filters (no parent/parent_index) and build index→object map
index_to_filter = {}
next_index = 0
for filter_data in filters_data:
is_root = filter_data.get('parent') in (None, '') and filter_data.get('parent_index') in (None, '')

if not is_root:
continue

Check warning on line 191 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L191

Added line #L191 was not covered by tests

# Enumerate root filters for stable ordering
filter_data['index'] = next_index
next_index += 1

# Remove helper key before model instantiation
filter_data.pop('parent_index', None)

root_obj = Filter.objects.create(**filter_data)
filter_group.filters.add(root_obj)
index_to_filter[root_obj.index] = root_obj

# Pass 2 — create child filters, resolving parent via `parent` PK or `parent_index`
for filter_data in filters_data:
filter_data['index'] = filter_index
filter_group.filters.add(Filter.objects.create(**filter_data))
filter_index += 1
has_parent_pk = filter_data.get('parent') not in (None, '')
has_parent_index = filter_data.get('parent_index') not in (None, '')

if not (has_parent_pk or has_parent_index):
# Already processed as a root filter
continue

fixed_data = filter_data.copy()

Check warning on line 213 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L213

Added line #L213 was not covered by tests

if has_parent_pk:
parent_obj = filter_group.filters.get(id=fixed_data['parent'])

Check warning on line 216 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L215-L216

Added lines #L215 - L216 were not covered by tests
else:
parent_idx = fixed_data.get('parent_index')
parent_obj = index_to_filter.get(parent_idx)
if parent_obj is None:
raise serializers.ValidationError(

Check warning on line 221 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L218-L221

Added lines #L218 - L221 were not covered by tests
f'parent_index={parent_idx} does not correspond to any root filter',
)

fixed_data['parent'] = parent_obj
fixed_data['index'] = None # Child filters do not participate in top-level ordering
fixed_data.pop('parent_index', None)

Check warning on line 227 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L225-L227

Added lines #L225 - L227 were not covered by tests

child_obj = Filter.objects.create(**fixed_data)
filter_group.filters.add(child_obj)

Check warning on line 230 in label_studio/data_manager/serializers.py

View check run for this annotation

Codecov / codecov/patch

label_studio/data_manager/serializers.py#L229-L230

Added lines #L229 - L230 were not covered by tests

def create(self, validated_data):
with transaction.atomic():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ const GroupWrapper = ({ children, wrap = false }) => {
};

export const FilterLine = observer(({ filter, availableFilters, index, view, sidebar, dropdownClassName }) => {
// Determine this filter's position in the master filters array; child filters store
// a reference to their parent via this absolute index, not the index within the
// (potentially filtered) list rendered in the sidebar. Using the absolute index
// guarantees that child filters are rendered inline with their correct parent
// even when the sidebar list omits them.
const parentIdx = filter.view.filters.indexOf(filter);

const childFilters = filter.view.filters.filter((f) => f.parent_index === parentIdx);

return (
<Block name="filter-line" tag={Fragment}>
<GroupWrapper wrap={sidebar}>
Expand Down Expand Up @@ -72,6 +81,24 @@ export const FilterLine = observer(({ filter, availableFilters, index, view, sid
</GroupWrapper>
<GroupWrapper wrap={sidebar}>
<FilterOperation filter={filter} value={filter.currentValue} operator={filter.operator} field={filter.field} />

{/* Render child filters (join filters) inline */}
{childFilters.map((child, idx) => {
console.debug("[DM] render child filter", { parent: filter, child });
return (
<Elem name="column" key={`child-${idx}`} mix="value">
<Tag size="small" className="filters-data-tag" color="#1d91e4" style={{ marginRight: 4 }}>
{child.field.title}
</Tag>
<FilterOperation
filter={child}
value={child.currentValue}
operator={child.operator}
field={child.field}
/>
</Elem>
);
})}
</GroupWrapper>
<Elem name="remove">
<Button
Expand Down
Loading
Loading