first version check with dict

cghielmini · cghielmini · commit 88a6466465a7 · 2025-10-31T12:29:12.000+01:00
diff --git a/engine/check.py b/engine/check.py
@@ -43,13 +43,8 @@
     default="",
     help=cli_help["fof_types"],
 )
-def check(
-    reference_files,
-    current_files,
-    tolerance_files,
-    factor,
-    fof_types,
-):
+@click.option("--rules", default="")
+def check(reference_files, current_files, tolerance_files, factor, fof_types, rules):
 
     zipped = zip(reference_files, current_files, tolerance_files)
 
@@ -60,7 +55,7 @@ def check(
     for reference_file, current_file, tolerance_file in expanded_zip:
 
         out, err, tol = check_file_with_tolerances(
-            tolerance_file, reference_file, current_file, factor
+            tolerance_file, reference_file, current_file, factor, rules
         )
 
         if out:
diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
@@ -5,6 +5,7 @@
 reference datasets with specified tolerances.
 """
 
+import ast
 import sys
 import warnings
 
@@ -297,7 +298,7 @@ def parse_check(tolerance_file_name, input_file_ref, input_file_cur, factor):
 
 
 def check_file_with_tolerances(
-    tolerance_file_name, input_file_ref, input_file_cur, factor
+    tolerance_file_name, input_file_ref, input_file_cur, factor, rules
 ):
     file_type = get_file_type(input_file_ref)
 
@@ -315,20 +316,27 @@ def check_file_with_tolerances(
         df_ref = parse_probtest_fof(input_file_ref)
 
         df_cur = parse_probtest_fof(input_file_cur)
-        cols = ["check", "state", "r_state", "r_check"]
-        existing_cols = [c for c in cols if c in df_ref and c in df_cur]
 
-        if existing_cols:
-            ds1_multiple = df_ref[existing_cols]
-            ds2_multiple = df_cur[existing_cols]
+        errors = multiple_solutions_from_dict(df_ref, df_cur, rules)
 
-            out, diff = check_multiple_solutions(
-                ds1_multiple, ds2_multiple, existing_cols
-            )
+        if errors:
+            logger.error("RESULT: check FAILED")
+            sys.exit(1)
+
+        # cols = ["check", "state", "r_state", "r_check"]
+        # existing_cols = [c for c in cols if c in df_ref and c in df_cur]
+
+        # if existing_cols:
+        #     ds1_multiple = df_ref[existing_cols]
+        #     ds2_multiple = df_cur[existing_cols]
+
+        #     out, diff = check_multiple_solutions(
+        #         ds1_multiple, ds2_multiple, existing_cols
+        #     )
 
-            if out == 1:
-                logger.error("RESULT: check FAILED. Errors at the lines %s", diff)
-                sys.exit(1)
+        #     if out == 1:
+        #         logger.error("RESULT: check FAILED. Errors at the lines %s", diff)
+        #         sys.exit(1)
 
     else:
         df_tol, df_ref, df_cur = parse_check(
@@ -383,61 +391,109 @@ def has_enough_data(dfs):
 }
 
 
-def check_multiple_solutions(ds1, ds2, existing_cols):
-
-    allowed_checks = [13, 18, 32]
-    allowed_states = [1, 5, 7, 9]
-
-    df1 = ds1[existing_cols]
-    df2 = ds2[existing_cols]
-
-    diff = []
-
-    check_cols = ["check", "r_check"]
-    state_cols = ["state", "r_state"]
-    out = 0
-
-    for idx in df1.index:
-        check_col = next((c for c in check_cols if c in df1.columns), None)
-        state_col = next((c for c in state_cols if c in df1.columns), None)
-
-        if check_col is None or state_col is None:
-            raise KeyError("'Check' or “state” columns not found in datasets.")
-
-        check_ref = df1.at[idx, check_col]
-        check_cur = df2.at[idx, check_col]
-        state_ref = df1.at[idx, state_col]
-        state_cur = df2.at[idx, state_col]
-
-        # CASE 1: check does not change
-        if check_ref == check_cur:
-            # If check is not an accepted, state should not change
-            if check_ref not in allowed_checks:
-                if state_ref != state_cur:
-                    out = 1
-                    diff.append(idx)
-
-            # If is an admitted change, state can change, but only in the admitted cases
-            else:
-                if state_ref != state_cur:
-                    if (state_ref not in allowed_states) or (
-                        state_cur not in allowed_states
-                    ):
-                        out = 1
-                        diff.append(idx)
-
-        # CASE 2: check changes
-        else:
-            if (check_ref not in allowed_checks) and (check_cur not in allowed_checks):
-                out = 1
-                diff.append(idx)
-
-            # If check values are both admitted, also state should
-            # be in the admitted values
-            elif (state_ref not in allowed_states) or (state_cur not in allowed_states):
-                out = 1
-                diff.append(idx)
-
-    diff = np.array(diff)
-
-    return out, diff
+def multiple_solutions_from_dict(df_ref, df_cur, rules):
+
+    rules_dict = ast.literal_eval(rules)
+
+    cols_present = [
+        col
+        for col in rules_dict.keys()
+        if col in df_ref.columns and col in df_cur.columns
+    ]
+    errors = []
+
+    if cols_present:
+        for i in range(len(df_ref)):
+            row1 = df_ref.iloc[i]
+            row2 = df_cur.iloc[i]
+
+            for col in cols_present:
+                val1 = row1[col]
+                val2 = row2[col]
+
+                if val1 != val2:
+                    if val1 not in rules_dict[col] or val2 not in rules_dict[col]:
+                        errors.append(
+                            {
+                                "row": i,
+                                "column": col,
+                                "file1": val1,
+                                "file2": val2,
+                                "error": "values different and not admitted",
+                            }
+                        )
+
+        if errors:
+            logger.error("Errors found while comparing the files:")
+            for e in errors:
+                logger.error(
+                    "Row %s - Column '%s': file1=%s, file2=%s → %s",
+                    e["row"],
+                    e["column"],
+                    e["file1"],
+                    e["file2"],
+                    e["error"],
+                )
+            return errors
+
+        return []
+
+
+# def check_multiple_solutions(ds1, ds2, existing_cols):
+
+#     allowed_checks = [13, 18, 32]
+#     allowed_states = [1, 5, 7, 9]
+
+#     df1 = ds1[existing_cols]
+#     df2 = ds2[existing_cols]
+
+#     diff = []
+
+#     check_cols = ["check", "r_check"]
+#     state_cols = ["state", "r_state"]
+#     out = 0
+
+#     for idx in df1.index:
+#         check_col = next((c for c in check_cols if c in df1.columns), None)
+#         state_col = next((c for c in state_cols if c in df1.columns), None)
+
+#         if check_col is None or state_col is None:
+#             raise KeyError("'Check' or “state” columns not found in datasets.")
+
+#         check_ref = df1.at[idx, check_col]
+#         check_cur = df2.at[idx, check_col]
+#         state_ref = df1.at[idx, state_col]
+#         state_cur = df2.at[idx, state_col]
+
+#         # CASE 1: check does not change
+#         if check_ref == check_cur:
+#             # If check is not an accepted, state should not change
+#             if check_ref not in allowed_checks:
+#                 if state_ref != state_cur:
+#                     out = 1
+#                     diff.append(idx)
+
+#             # If is an admitted change, state can change, but only in the admitted cases
+#             else:
+#                 if state_ref != state_cur:
+#                     if (state_ref not in allowed_states) or (
+#                         state_cur not in allowed_states
+#                     ):
+#                         out = 1
+#                         diff.append(idx)
+
+#         # CASE 2: check changes
+#         else:
+#             if (check_ref not in allowed_checks) and (check_cur not in allowed_checks):
+#                 out = 1
+#                 diff.append(idx)
+
+#             # If check values are both admitted, also state should
+#             # be in the admitted values
+#             elif (state_ref not in allowed_states) or (state_cur not in allowed_states):
+#                 out = 1
+#                 diff.append(idx)
+
+#     diff = np.array(diff)
+
+#     return out, diff