From 7ad7e92e3258e2cd9f9d47deed2d89cbe83cf0df Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Mon, 19 May 2025 22:34:17 +0100
Subject: [PATCH 1/2] adding rank_feature dsl query docs

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _query-dsl/specialized/index.md        |   2 +-
 _query-dsl/specialized/rank-feature.md | 547 +++++++++++++++++++++++++
 2 files changed, 548 insertions(+), 1 deletion(-)
 create mode 100644 _query-dsl/specialized/rank-feature.md

diff --git a/_query-dsl/specialized/index.md b/_query-dsl/specialized/index.md
index d28451cfa8d..fd89887e397 100644
--- a/_query-dsl/specialized/index.md
+++ b/_query-dsl/specialized/index.md
@@ -22,7 +22,7 @@ OpenSearch supports the following specialized queries:
 
 - `percolate`: Finds queries (stored as documents) that match the provided document.
 
-- `rank_feature`: Calculates scores based on the values of numeric features. This query can skip non-competitive hits.
+- [`rank_feature`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/rank-feature/): Calculates scores based on the values of numeric features. This query can skip non-competitive hits.
 
 - `script`: Uses a script as a filter.
 
diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md
new file mode 100644
index 00000000000..73c13f4460a
--- /dev/null
+++ b/_query-dsl/specialized/rank-feature.md
@@ -0,0 +1,547 @@
+---
+layout: default
+title: Rank feature
+parent: Specialized queries
+nav_order: 75
+---
+
+# Rank feature
+
+Use the `rank_feature` query to boost document scores based on numeric values in the document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal, and is most effective when combined with other queries in a compound query like `bool`.
+
+The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting.
+
+The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used.
+
+## Parameters
+
+| Parameter               | Required/Optional | Description                                                                                                              |
+| ----------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| `field`                 | Required          | A `rank_feature` or `rank_features` field that contributes to document scoring.|
+| `boost`                 | Optional          | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. |
+| `saturation`            | Optional          | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. (Default function if no other function is provided)|
+| `log`                   | Optional          | Uses a logarithmic scoring function based on the field value. Best for large ranges of values.|
+| `sigmoid`               | Optional          | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`.|
+| `positive_score_impact` | Optional          | When `false`, lower values score higher. Useful for features like price where smaller is better. Defined as part of the mapping. (Default is `true`)|
+
+Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time.
+{: .note}
+
+## Create an index with rank feature field
+
+Define an index with a `rank_feature` field to represent a signal like `popularity`:
+
+```json
+PUT /products
+{
+  "mappings": {
+    "properties": {
+      "title": { "type": "text" },
+      "popularity": { "type": "rank_feature" }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Index example documents
+
+Add sample products with varying popularity values:
+
+```json
+POST /products/_bulk
+{ "index": { "_id": 1 } }
+{ "title": "Wireless Earbuds", "popularity": 1 }
+{ "index": { "_id": 2 } }
+{ "title": "Bluetooth Speaker", "popularity": 10 }
+{ "index": { "_id": 3 } }
+{ "title": "Portable Charger", "popularity": 25 }
+{ "index": { "_id": 4 } }
+{ "title": "Smartwatch", "popularity": 50 }
+{ "index": { "_id": 5 } }
+{ "title": "Noise Cancelling Headphones", "popularity": 100 }
+{ "index": { "_id": 6 } }
+{ "title": "Gaming Laptop", "popularity": 250 }
+{ "index": { "_id": 7 } }
+{ "title": "4K Monitor", "popularity": 500 }
+```
+{% include copy-curl.html %}
+
+## Basic rank feature query
+
+You can boost results based on the `popularity` score using `rank_feature`:
+
+```json
+POST /products/_search
+{
+  "query": {
+    "rank_feature": {
+      "field": "popularity"
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+This query alone does not perform filtering, rather it scores all documents based on the value of `popularity`. Higher values yield higher scores:
+
+```json
+{
+  ...
+  "hits": {
+    "total": {
+      "value": 7,
+      "relation": "eq"
+    },
+    "max_score": 0.9252834,
+    "hits": [
+      {
+        "_index": "products",
+        "_id": "7",
+        "_score": 0.9252834,
+        "_source": {
+          "title": "4K Monitor",
+          "popularity": 500
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "6",
+        "_score": 0.86095566,
+        "_source": {
+          "title": "Gaming Laptop",
+          "popularity": 250
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "5",
+        "_score": 0.71237755,
+        "_source": {
+          "title": "Noise Cancelling Headphones",
+          "popularity": 100
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "4",
+        "_score": 0.5532503,
+        "_source": {
+          "title": "Smartwatch",
+          "popularity": 50
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "3",
+        "_score": 0.38240916,
+        "_source": {
+          "title": "Portable Charger",
+          "popularity": 25
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "2",
+        "_score": 0.19851118,
+        "_source": {
+          "title": "Bluetooth Speaker",
+          "popularity": 10
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "1",
+        "_score": 0.024169207,
+        "_source": {
+          "title": "Wireless Earbuds",
+          "popularity": 1
+        }
+      }
+    ]
+  }
+}
+```
+
+## Combine with full-text search
+
+To filter relevant results and boost them based on popularity use the following request:
+
+```json
+POST /products/_search
+{
+  "query": {
+    "bool": {
+      "must": {
+        "match": {
+          "title": "headphones"
+        }
+      },
+      "should": {
+        "rank_feature": {
+          "field": "popularity"
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+This ranks all documents matching "headphones" and boosts those with higher popularity.
+
+## Boost parameter
+
+The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It's especially useful in compound queries such as bool, where you want to control the influence of a feature relative to other conditions.
+
+In the following example, the `bool` query matches documents with the term "headphones" in the `title`, and boosts more popular results with a `rank_feature` clause using a `boost` of `2.0`:
+
+```json
+POST /products/_search
+{
+  "query": {
+    "bool": {
+      "must": {
+        "match": {
+          "title": "headphones"
+        }
+      },
+      "should": {
+        "rank_feature": {
+          "field": "popularity",
+          "boost": 2.0
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+This will double the contribution of the rank_feature score in the overall document score. A `boost` less than `1.0` would down-weight its influence.
+
+## Configure score function
+
+By default, the `rank_feature` query uses a `saturation` function with a `pivot` value derived from the field. You can explicitly control this with the `saturation`, `log` or `sigmoid` functions.
+
+### Saturation function
+
+The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`.
+
+```json
+POST /products/_search
+{
+  "query": {
+    "rank_feature": {
+      "field": "popularity",
+      "saturation": {
+        "pivot": 50
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+The `pivot` defines the point at which scoring growth slows down. Values higher than `pivot` still increase the score, but with diminishing returns, as can be seen in the returned hits:
+
+```json
+{
+  ...
+  "hits": {
+    "total": {
+      "value": 7,
+      "relation": "eq"
+    },
+    "max_score": 0.9090909,
+    "hits": [
+      {
+        "_index": "products",
+        "_id": "7",
+        "_score": 0.9090909,
+        "_source": {
+          "title": "4K Monitor",
+          "popularity": 500
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "6",
+        "_score": 0.8333333,
+        "_source": {
+          "title": "Gaming Laptop",
+          "popularity": 250
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "5",
+        "_score": 0.6666666,
+        "_source": {
+          "title": "Noise Cancelling Headphones",
+          "popularity": 100
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "4",
+        "_score": 0.5,
+        "_source": {
+          "title": "Smartwatch",
+          "popularity": 50
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "3",
+        "_score": 0.3333333,
+        "_source": {
+          "title": "Portable Charger",
+          "popularity": 25
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "2",
+        "_score": 0.16666669,
+        "_source": {
+          "title": "Bluetooth Speaker",
+          "popularity": 10
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "1",
+        "_score": 0.019607842,
+        "_source": {
+          "title": "Wireless Earbuds",
+          "popularity": 1
+        }
+      }
+    ]
+  }
+}
+```
+
+If the pivot is not provided, approximate geometric mean of all rank_feature values in the index is used.
+
+### Log function
+
+The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is derived using formulae: `log(scaling_factor + rank_feature field)`, see following example:
+
+```json
+POST /products/_search
+{
+  "query": {
+    "rank_feature": {
+      "field": "popularity",
+      "log": {
+        "scaling_factor": 2
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500`, while still allowing documents with `10` or `25` to have meaningful scores. This is unlike `saturation`, where documents above the pivot rapidly approach the same maximum score.”
+
+```json
+{
+  ...
+  "hits": {
+    "total": {
+      "value": 7,
+      "relation": "eq"
+    },
+    "max_score": 6.2186003,
+    "hits": [
+      {
+        "_index": "products",
+        "_id": "7",
+        "_score": 6.2186003,
+        "_source": {
+          "title": "4K Monitor",
+          "popularity": 500
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "6",
+        "_score": 5.529429,
+        "_source": {
+          "title": "Gaming Laptop",
+          "popularity": 250
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "5",
+        "_score": 4.624973,
+        "_source": {
+          "title": "Noise Cancelling Headphones",
+          "popularity": 100
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "4",
+        "_score": 3.9512436,
+        "_source": {
+          "title": "Smartwatch",
+          "popularity": 50
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "3",
+        "_score": 3.295837,
+        "_source": {
+          "title": "Portable Charger",
+          "popularity": 25
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "2",
+        "_score": 2.4849067,
+        "_source": {
+          "title": "Bluetooth Speaker",
+          "popularity": 10
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "1",
+        "_score": 1.0986123,
+        "_source": {
+          "title": "Wireless Earbuds",
+          "popularity": 1
+        }
+      }
+    ]
+  }
+}
+```
+
+### Sigmoid function
+
+The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example:
+
+```json
+POST /products/_search
+{
+  "query": {
+    "rank_feature": {
+      "field": "popularity",
+      "sigmoid": {
+        "pivot": 50,
+        "exponent": 0.5
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+* `pivot` defines the value at which the score is 0.5.
+* `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot.
+
+The sigmoid function smoothly boosts scores around the `pivot` (`50` in this case), giving moderate preference to values near the pivot while flattening out both high and low extremes:
+
+```json
+{
+  ...
+  "hits": {
+    "total": {
+      "value": 7,
+      "relation": "eq"
+    },
+    "max_score": 0.7597469,
+    "hits": [
+      {
+        "_index": "products",
+        "_id": "7",
+        "_score": 0.7597469,
+        "_source": {
+          "title": "4K Monitor",
+          "popularity": 500
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "6",
+        "_score": 0.690983,
+        "_source": {
+          "title": "Gaming Laptop",
+          "popularity": 250
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "5",
+        "_score": 0.58578646,
+        "_source": {
+          "title": "Noise Cancelling Headphones",
+          "popularity": 100
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "4",
+        "_score": 0.5,
+        "_source": {
+          "title": "Smartwatch",
+          "popularity": 50
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "3",
+        "_score": 0.41421357,
+        "_source": {
+          "title": "Portable Charger",
+          "popularity": 25
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "2",
+        "_score": 0.309017,
+        "_source": {
+          "title": "Bluetooth Speaker",
+          "popularity": 10
+        }
+      },
+      {
+        "_index": "products",
+        "_id": "1",
+        "_score": 0.12389934,
+        "_source": {
+          "title": "Wireless Earbuds",
+          "popularity": 1
+        }
+      }
+    ]
+  }
+}
+```
+
+### Invert score impact
+
+By default, higher values lead to higher scores. If you want lower values to yield higher scores (e.g., lower prices are more relevant), set `positive_score_impact` to `false` during index creation:
+
+```json
+PUT /products_new
+{
+  "mappings": {
+    "properties": {
+      "popularity": {
+        "type": "rank_feature",
+        "positive_score_impact": false
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}

From 4595b4529cc38a832521643b9cdbf06d54a6baab Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Tue, 8 Jul 2025 12:07:50 +0100
Subject: [PATCH 2/2] addressing PR comments

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _query-dsl/specialized/rank-feature.md | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md
index 73c13f4460a..b11fb63d6d9 100644
--- a/_query-dsl/specialized/rank-feature.md
+++ b/_query-dsl/specialized/rank-feature.md
@@ -11,7 +11,7 @@ Use the `rank_feature` query to boost document scores based on numeric values in
 
 The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting.
 
-The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used.
+The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used. These functions are applied dynamically at query time to compute the final document score, they do not alter or store any values in the document itself.
 
 ## Parameters
 
@@ -192,9 +192,9 @@ This ranks all documents matching "headphones" and boosts those with higher popu
 
 ## Boost parameter
 
-The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It's especially useful in compound queries such as bool, where you want to control the influence of a feature relative to other conditions.
+The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It’s especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking.
 
-In the following example, the `bool` query matches documents with the term "headphones" in the `title`, and boosts more popular results with a `rank_feature` clause using a `boost` of `2.0`:
+In the following example, the bool query matches documents with the term "headphones" in the `title`, and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`:
 
 ```json
 POST /products/_search
@@ -226,7 +226,7 @@ By default, the `rank_feature` query uses a `saturation` function with a `pivot`
 
 ### Saturation function
 
-The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`.
+The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. See following example using `saturation` with `pivot` configured to `50`:
 
 ```json
 POST /products/_search
@@ -323,8 +323,6 @@ The `pivot` defines the point at which scoring growth slows down. Values higher
 }
 ```
 
-If the pivot is not provided, approximate geometric mean of all rank_feature values in the index is used.
-
 ### Log function
 
 The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is derived using formulae: `log(scaling_factor + rank_feature field)`, see following example:
@@ -426,7 +424,7 @@ In the example dataset, the `popularity` field ranges from `1` to `500`. The `lo
 
 ### Sigmoid function
 
-The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example:
+The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example of a query using `sigmoid` function with configured `pivot` and `exponent`:
 
 ```json
 POST /products/_search
@@ -529,7 +527,7 @@ The sigmoid function smoothly boosts scores around the `pivot` (`50` in this cas
 
 ### Invert score impact
 
-By default, higher values lead to higher scores. If you want lower values to yield higher scores (e.g., lower prices are more relevant), set `positive_score_impact` to `false` during index creation:
+By default, higher values lead to higher scores. If you want lower values to yield higher scores (for example, lower prices are more relevant), set `positive_score_impact` to `false` during index creation:
 
 ```json
 PUT /products_new