Skip to content

Commit 4dd7e29

Browse files
committed
INTEGRITY: Merge one of the entries from dropped duplicate entries. Drop others.
1 parent a303645 commit 4dd7e29

File tree

1 file changed

+37
-13
lines changed

1 file changed

+37
-13
lines changed

db_functions.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,7 +1693,7 @@ def set_process(
16931693
console_message = "Looking for duplicates..."
16941694
console_log(console_message)
16951695

1696-
# Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants.
1696+
# Remove all such filesets, which have many to one mapping with a single candidate, just merge one of them.
16971697
value_to_keys = defaultdict(list)
16981698
for set_fileset, candidates in set_to_candidate_dict.items():
16991699
if len(candidates) == 1:
@@ -1717,7 +1717,12 @@ def set_process(
17171717
platform = result["platform"]
17181718
language = result["language"]
17191719

1720+
# Skip the first entry, let it merge and drop others
1721+
skip = True
17201722
for set_fileset in set_filesets:
1723+
if skip:
1724+
skip = False
1725+
continue
17211726
fileset = id_to_fileset_dict[set_fileset]
17221727
category_text = "Drop fileset - Duplicates"
17231728
fileset_name = fileset["name"] if "name" in fileset else ""
@@ -1742,9 +1747,9 @@ def set_process(
17421747
fileset = id_to_fileset_dict[fileset_id]
17431748

17441749
# Filter by platform to reduce manual merge
1745-
candidate_filesets = set_filter_by_platform(
1746-
fileset["name"], candidate_filesets, conn
1747-
)
1750+
# candidate_filesets = set_filter_by_platform(
1751+
# fileset["name"], candidate_filesets, conn
1752+
# )
17481753

17491754
(
17501755
fully_matched_filesets,
@@ -1771,16 +1776,35 @@ def set_process(
17711776
match_count += 1
17721777
console_log("Matching performed.")
17731778

1774-
for fileset_id, candidates in manual_merge_map.items():
1775-
category_text = "Manual Merge Required"
1776-
log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
1777-
manual_merged_filesets += 1
1778-
add_manual_merge(
1779-
candidates, fileset_id, category_text, log_text, user, conn, log_text
1780-
)
1781-
1782-
# Final log
17831779
with conn.cursor() as cursor:
1780+
for fileset_id, candidates in manual_merge_map.items():
1781+
if len(candidates) == 0:
1782+
category_text = "Drop fileset - No Candidates"
1783+
fileset = id_to_fileset_dict[fileset_id]
1784+
fileset_name = fileset["name"] if "name" in fileset else ""
1785+
fileset_description = (
1786+
fileset["description"] if "description" in fileset else ""
1787+
)
1788+
log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
1789+
create_log(
1790+
escape_string(category_text), user, escape_string(log_text), conn
1791+
)
1792+
dropped_early_no_candidate += 1
1793+
delete_original_fileset(fileset_id, conn)
1794+
else:
1795+
category_text = "Manual Merge Required"
1796+
log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
1797+
manual_merged_filesets += 1
1798+
add_manual_merge(
1799+
candidates,
1800+
fileset_id,
1801+
category_text,
1802+
log_text,
1803+
user,
1804+
conn,
1805+
log_text,
1806+
)
1807+
17841808
cursor.execute(
17851809
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
17861810
(transaction_id,),

0 commit comments

Comments
 (0)