Fusing both Gte Configs. (#530)

Narsil · web-flow · commit 69714ef026dc · 2025-03-26T10:59:14.000+01:00
diff --git a/backends/candle/src/lib.rs b/backends/candle/src/lib.rs
@@ -59,9 +59,8 @@ enum Config {
     NomicBert(NomicConfig),
     #[allow(dead_code)]
     Mistral(MistralConfig),
+    #[serde(alias = "new")]
     Gte(GTEConfig),
-    #[serde(rename = "new")]
-    GteAlibaba(GTEConfig),
     #[allow(dead_code)]
     Qwen2(Qwen2Config),
     #[serde(rename = "mpnet")]
@@ -224,7 +223,7 @@ impl CandleBackend {
                 "Mistral is only supported on Cuda devices in fp16 with flash attention enabled"
                     .to_string(),
             )),
-            (Config::Gte(config) | Config::GteAlibaba(config), Device::Cpu | Device::Metal(_)) => {
+            (Config::Gte(config), Device::Cpu | Device::Metal(_)) => {
                 tracing::info!("Starting GTE model on {:?}", device);
                 Ok(Box::new(GTEModel::load(vb, &config, model_type).s()?))
             }
@@ -355,7 +354,7 @@ impl CandleBackend {
                 ))
             }
             #[cfg(feature = "cuda")]
-            (Config::Gte(config) | Config::GteAlibaba(config), Device::Cuda(_)) => {
+            (Config::Gte(config), Device::Cuda(_)) => {
                 if dtype != DType::F16
                     || !cfg!(any(feature = "flash-attn", feature = "flash-attn-v1"))
                 {
diff --git a/backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap b/backends/candle/tests/snapshots/test_gte__snowflake_gte_batch.snap
@@ -2307,4 +2307,3 @@ expression: embeddings_batch
   - -0.16524515
   - -0.100704014
   - 0.3677737
-
diff --git a/backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap b/backends/candle/tests/snapshots/test_gte__snowflake_gte_single.snap
@@ -771,4 +771,3 @@ expression: embeddings_single
   - -0.16524515
   - -0.100704014
   - 0.3677737
-