From 10c2f6e360d60730ab6595ecab2947c0c5aaf2ee Mon Sep 17 00:00:00 2001
From: Nina Xu <nina.ning.xu@gmail.com>
Date: Fri, 23 May 2025 19:02:35 +0000
Subject: [PATCH] change default rope to auto for v2 configs

---
 .../gretel/tasks/tabular_ft__default.yaml             | 11 ++++++-----
 .../tasks/tabular_ft__differential_privacy.yaml       | 11 ++++++-----
 config_templates/gretel/workflows/tabular-ft-dp.yml   | 11 ++++++-----
 config_templates/gretel/workflows/tabular-ft.yml      | 11 ++++++-----
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/config_templates/gretel/tasks/tabular_ft__default.yaml b/config_templates/gretel/tasks/tabular_ft__default.yaml
index 2b25d20e..eae866a3 100644
--- a/config_templates/gretel/tasks/tabular_ft__default.yaml
+++ b/config_templates/gretel/tasks/tabular_ft__default.yaml
@@ -30,11 +30,12 @@ task:
         # Scale the base LLM's context length by this factor
         # using RoPE scaling to handle datasets with more
         # columns, or datasets containing groups with more
-        # than a few records. You can try increasing the
-        # rope_scaling_factor (you could first try the value 2)
-        # if you hit an error for maximum tokens. It must be
-        # an integer value. The default is 1 and maximum is 6. 
-        rope_scaling_factor: 1
+        # than a few records. If set to 'auto', we will
+        # estimate a value that's enough to cover your
+        # dataset. Try increasing this value if you hit an
+        # error for maximum tokens. It must be an integer
+        # value between 1 and 6.
+        rope_scaling_factor: auto
 
     generate:
       num_records: 1000
diff --git a/config_templates/gretel/tasks/tabular_ft__differential_privacy.yaml b/config_templates/gretel/tasks/tabular_ft__differential_privacy.yaml
index fbc15fd2..01044be6 100644
--- a/config_templates/gretel/tasks/tabular_ft__differential_privacy.yaml
+++ b/config_templates/gretel/tasks/tabular_ft__differential_privacy.yaml
@@ -38,11 +38,12 @@ task:
         # Scale the base LLM's context length by this factor
         # using RoPE scaling to handle datasets with more
         # columns, or datasets containing groups with more
-        # than a few records. You can try increasing the
-        # rope_scaling_factor (you could first try the value 2)
-        # if you hit an error for maximum tokens. It must be
-        # an integer value. The default is 1 and maximum is 6. 
-        rope_scaling_factor: 1
+        # than a few records. If set to 'auto', we will
+        # estimate a value that's enough to cover your
+        # dataset. Try increasing this value if you hit an
+        # error for maximum tokens. It must be an integer
+        # value between 1 and 6.
+        rope_scaling_factor: auto
 
         # You can try increasing this until you run out-of-memory.
         batch_size: 4
diff --git a/config_templates/gretel/workflows/tabular-ft-dp.yml b/config_templates/gretel/workflows/tabular-ft-dp.yml
index 9672ea67..fc197491 100644
--- a/config_templates/gretel/workflows/tabular-ft-dp.yml
+++ b/config_templates/gretel/workflows/tabular-ft-dp.yml
@@ -46,11 +46,12 @@ steps:
           # Scale the base LLM's context length by this factor
           # using RoPE scaling to handle datasets with more
           # columns, or datasets containing groups with more
-          # than a few records. You can try increasing the
-          # rope_scaling_factor (you could first try the value 2)
-          # if you hit an error for maximum tokens. It must be
-          # an integer value. The default is 1 and maximum is 6. 
-          rope_scaling_factor: 1
+          # than a few records. If set to 'auto', we will
+          # estimate a value that's enough to cover your
+          # dataset. Try increasing this value if you hit an
+          # error for maximum tokens. It must be an integer
+          # value between 1 and 6.
+          rope_scaling_factor: auto
 
           # You can try increasing this until you run out-of-memory.
           batch_size: 4
diff --git a/config_templates/gretel/workflows/tabular-ft.yml b/config_templates/gretel/workflows/tabular-ft.yml
index 05d4a008..3839281a 100644
--- a/config_templates/gretel/workflows/tabular-ft.yml
+++ b/config_templates/gretel/workflows/tabular-ft.yml
@@ -38,11 +38,12 @@ steps:
           # Scale the base LLM's context length by this factor
           # using RoPE scaling to handle datasets with more
           # columns, or datasets containing groups with more
-          # than a few records. You can try increasing the
-          # rope_scaling_factor (you could first try the value 2)
-          # if you hit an error for maximum tokens. It must be
-          # an integer value. The default is 1 and maximum is 6. 
-          rope_scaling_factor: 1
+          # than a few records. If set to 'auto', we will
+          # estimate a value that's enough to cover your
+          # dataset. Try increasing this value if you hit an
+          # error for maximum tokens. It must be an integer
+          # value between 1 and 6.
+          rope_scaling_factor: auto
 
       generate:
         num_records: 1000