diff --git a/label_studio/core/settings/base.py b/label_studio/core/settings/base.py index daf884fe9d8f..c1737bac94b4 100644 --- a/label_studio/core/settings/base.py +++ b/label_studio/core/settings/base.py @@ -587,6 +587,7 @@ DATA_MANAGER_ACTIONS = {} DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS = 'data_manager.functions.custom_filter_expressions' DATA_MANAGER_PREPROCESS_FILTER = 'data_manager.functions.preprocess_filter' +BULK_UPDATE_IS_LABELED = 'tasks.functions.bulk_update_is_labeled_by_overlap' USER_LOGIN_FORM = 'users.forms.LoginForm' PROJECT_MIXIN = 'projects.mixins.ProjectMixin' TASK_MIXIN = 'tasks.mixins.TaskMixin' diff --git a/label_studio/projects/functions/utils.py b/label_studio/projects/functions/utils.py index 75eda5ca9e2d..b9aa35dac2a3 100644 --- a/label_studio/projects/functions/utils.py +++ b/label_studio/projects/functions/utils.py @@ -47,7 +47,7 @@ def get_unique_ids_list(tasks_queryset): elif isinstance(tasks_queryset, QuerySet): # It's a Django QuerySet - return list(tasks_queryset.values_list('id', flat=True)) + return list(tasks_queryset.values_list('id', flat=True).iterator(chunk_size=1000)) else: raise ValueError(f'Unsupported type for tasks_queryset: {type(tasks_queryset)}') diff --git a/label_studio/tasks/functions.py b/label_studio/tasks/functions.py index 2dbc5daba641..b24b18da0614 100644 --- a/label_studio/tasks/functions.py +++ b/label_studio/tasks/functions.py @@ -230,3 +230,47 @@ def update_tasks_counters(queryset, from_scratch=True): updated_count += len(batch_list) return updated_count + + +def bulk_update_is_labeled_by_overlap(tasks_ids, project): + """Bulk update is_labeled status for tasks using overlap-based calculation only. + + This function updates the is_labeled field for tasks based on overlap settings. + It only works when the project can use overlap (automatic assignment mode). + Processes tasks in batches to handle large datasets efficiently. + + :param tasks_ids: List or QuerySet of task IDs + :param project: Project instance + :return: None + :raises ValueError: If project cannot use overlap + """ + from django.db.models import F, Q + from django.db.models.lookups import GreaterThanOrEqual + + if not tasks_ids: + return + + if not project._can_use_overlap(): + raise ValueError('Project cannot use overlap-based calculation') + + # Convert to list if it's a QuerySet to enable slicing + if hasattr(tasks_ids, 'values_list'): + tasks_ids = list(tasks_ids) + + # following definition of `completed_annotations` above, count cancelled annotations + # as completed if project is in IGNORE_SKIPPED mode + completed_annotations_f_expr = F('total_annotations') + if project.skip_queue == project.SkipQueue.IGNORE_SKIPPED: + completed_annotations_f_expr += F('cancelled_annotations') + finished_q = Q(GreaterThanOrEqual(completed_annotations_f_expr, F('overlap'))) + + # Process in batches + batch_size = settings.BATCH_SIZE + for i in range(0, len(tasks_ids), batch_size): + batch_ids = tasks_ids[i : i + batch_size] + + # Update is_labeled=True for tasks that meet the overlap criteria + Task.objects.filter(id__in=batch_ids, project=project).filter(finished_q).update(is_labeled=True) + + # Update is_labeled=False for tasks that don't meet the criteria + Task.objects.filter(id__in=batch_ids, project=project).exclude(finished_q).update(is_labeled=False) diff --git a/label_studio/tasks/models.py b/label_studio/tasks/models.py index 8a2c7c9cd625..28ac1015e028 100644 --- a/label_studio/tasks/models.py +++ b/label_studio/tasks/models.py @@ -1376,7 +1376,7 @@ def update_task_stats(task, stats=('is_labeled',), save=True): task.save() -def bulk_update_stats_project_tasks(tasks, project=None): +def deprecated_bulk_update_stats_project_tasks(tasks, project=None): """bulk Task update accuracy ex: after change settings apply several update queries size of batch @@ -1426,5 +1426,43 @@ def bulk_update_stats_project_tasks(tasks, project=None): ) +def bulk_update_stats_project_tasks(tasks, project=None): + """bulk Task update accuracy + ex: after change settings + apply several update queries size of batch + on updated Task objects + in single transaction as execute sql + :param tasks: QuerySet or list of Task objects + :param project: Project instance (optional, will be inferred from first task if not provided) + :return: None + """ + from core.feature_flags import flag_set + + bulk_update_is_labeled = load_func(settings.BULK_UPDATE_IS_LABELED) + + if flag_set('fflag_fix_back_plt_802_update_is_labeled_20062025_short', user='auto'): + # New implementation using bulk_update_is_labeled + from projects.functions.utils import get_unique_ids_list + + # Convert tasks to list of IDs + task_ids = get_unique_ids_list(tasks) + + # Check if empty + if not task_ids: + return + + # Get project if it's not in params + if project is None: + # Load first task to get project + first_task = Task.objects.get(id=task_ids[0]) + project = first_task.project + + # Call the loaded bulk_update_is_labeled function + bulk_update_is_labeled(task_ids, project) + else: + # Use deprecated implementation + return deprecated_bulk_update_stats_project_tasks(tasks, project) + + Q_finished_annotations = Q(was_cancelled=False) & Q(result__isnull=False) Q_task_finished_annotations = Q(annotations__was_cancelled=False) & Q(annotations__result__isnull=False)