Skip to content

Commit f164561

Browse files
exa docs and python package update (#31307)
Added support for new Exa API features. Updated Exa docs and python package (langchain-exa). Description Added support for new Exa API features in the langchain-exa package: - Added max_characters option for text content - Added support for summary and custom summary prompts - Added livecrawl option with "always", "fallback", "never" settings - Added "auto" option for search type - Updated documentation and tests Dependencies - No new dependencies required. Using existing features from exa-py. twitter: @theishangoswami --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
1 parent cf1fa27 commit f164561

File tree

7 files changed

+186
-19
lines changed

7 files changed

+186
-19
lines changed

libs/partners/exa/README.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,30 @@ results = exa.invoke("What is the capital of France?")
2727
print(results)
2828
```
2929

30+
### Advanced Features
31+
32+
You can use advanced features like text limits, summaries, and live crawling:
33+
34+
```python
35+
from langchain_exa import ExaSearchRetriever, TextContentsOptions
36+
37+
# Create a new instance with advanced options
38+
exa = ExaSearchRetriever(
39+
exa_api_key="YOUR API KEY",
40+
k=20, # Number of results (1-100)
41+
type="auto", # Can be "neural", "keyword", or "auto"
42+
livecrawl="always", # Can be "always", "fallback", or "never"
43+
summary=True, # Get an AI-generated summary of each result
44+
text_contents_options={"max_characters": 3000} # Limit text length
45+
)
46+
47+
# Search for a query with custom summary prompt
48+
exa_with_custom_summary = ExaSearchRetriever(
49+
exa_api_key="YOUR API KEY",
50+
summary={"query": "generate one line summary in simple words."} # Custom summary prompt
51+
)
52+
```
53+
3054
## Exa Search Results
3155

3256
You can run the ExaSearchResults module as follows
@@ -48,6 +72,33 @@ search_results = search_tool._run(
4872
print("Search Results:", search_results)
4973
```
5074

75+
### Advanced Features
76+
77+
You can use advanced features like text limits, summaries, and live crawling:
78+
79+
```python
80+
from langchain_exa import ExaSearchResults
81+
82+
# Initialize the ExaSearchResults tool
83+
search_tool = ExaSearchResults(exa_api_key="YOUR API KEY")
84+
85+
# Perform a search query with advanced options
86+
search_results = search_tool._run(
87+
query="Latest AI research papers",
88+
num_results=10, # Number of results (1-100)
89+
type="auto", # Can be "neural", "keyword", or "auto"
90+
livecrawl="always", # Can be "always", "fallback", or "never"
91+
summary=True, # Get an AI-generated summary of each result
92+
text_contents_options={"max_characters": 2000} # Limit text length
93+
)
94+
95+
# With custom summary prompt
96+
search_results_with_custom_summary = search_tool._run(
97+
query="Latest AI research papers",
98+
summary={"query": "generate one liner"} # Custom summary prompt
99+
)
100+
```
101+
51102
## Exa Find Similar Results
52103

53104
You can run the ExaFindSimilarResults module as follows
@@ -67,4 +118,22 @@ similar_results = find_similar_tool._run(
67118
)
68119

69120
print("Similar Results:", similar_results)
121+
```
122+
123+
### Advanced Features
124+
125+
```python
126+
from langchain_exa import ExaFindSimilarResults
127+
128+
# Initialize the ExaFindSimilarResults tool
129+
find_similar_tool = ExaFindSimilarResults(exa_api_key="YOUR API KEY")
130+
131+
# Find similar results with advanced options
132+
similar_results = find_similar_tool._run(
133+
url="http://espn.com",
134+
num_results=10, # Number of results (1-100)
135+
livecrawl="fallback", # Can be "always", "fallback", or "never"
136+
summary=True, # Get an AI-generated summary of each result
137+
text_contents_options={"max_characters": 1500} # Limit text length
138+
)
70139
```

libs/partners/exa/langchain_exa/retrievers.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,16 @@ def _get_metadata(result: Any) -> dict[str, Any]:
2727
metadata["highlights"] = result.highlights
2828
if getattr(result, "highlight_scores"):
2929
metadata["highlight_scores"] = result.highlight_scores
30+
if getattr(result, "summary"):
31+
metadata["summary"] = result.summary
3032
return metadata
3133

3234

3335
class ExaSearchRetriever(BaseRetriever):
3436
"""Exa Search retriever."""
3537

3638
k: int = 10 # num_results
37-
"""The number of search results to return."""
39+
"""The number of search results to return (1 to 100)."""
3840
include_domains: Optional[list[str]] = None
3941
"""A list of domains to include in the search."""
4042
exclude_domains: Optional[list[str]] = None
@@ -50,11 +52,20 @@ class ExaSearchRetriever(BaseRetriever):
5052
use_autoprompt: Optional[bool] = None
5153
"""Whether to use autoprompt for the search."""
5254
type: str = "neural"
53-
"""The type of search, 'keyword' or 'neural'. Default: neural"""
55+
"""The type of search, 'keyword', 'neural', or 'auto'. Default: neural"""
5456
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None
5557
"""Whether to set the page content to the highlights of the results."""
56-
text_contents_options: Union[TextContentsOptions, Literal[True]] = True
57-
"""How to set the page content of the results"""
58+
text_contents_options: Union[TextContentsOptions, dict[str, Any], Literal[True]] = (
59+
True
60+
)
61+
"""How to set the page content of the results. Can be True or a dict with options
62+
like max_characters."""
63+
livecrawl: Optional[Literal["always", "fallback", "never"]] = None
64+
"""Option to crawl live webpages if content is not in the index. Options: "always",
65+
"fallback", "never"."""
66+
summary: Optional[Union[bool, dict[str, str]]] = None
67+
"""Whether to include a summary of the content. Can be a boolean or a dict with a
68+
custom query."""
5869

5970
client: Exa = Field(default=None)
6071
exa_api_key: SecretStr = Field(default=None)
@@ -82,6 +93,9 @@ def _get_relevant_documents(
8293
start_published_date=self.start_published_date,
8394
end_published_date=self.end_published_date,
8495
use_autoprompt=self.use_autoprompt,
96+
livecrawl=self.livecrawl,
97+
summary=self.summary,
98+
type=self.type,
8599
)
86100

87101
results = response.results

libs/partners/exa/langchain_exa/tools.py

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Tool for the Exa Search API."""
22

3-
from typing import Any, Optional, Union
3+
from typing import Any, Literal, Optional, Union
44

55
from exa_py import Exa # type: ignore[untyped-import]
66
from exa_py.api import (
@@ -74,8 +74,10 @@ def validate_environment(cls, values: dict) -> Any:
7474
def _run(
7575
self,
7676
query: str,
77-
num_results: int,
78-
text_contents_options: Optional[Union[TextContentsOptions, bool]] = None,
77+
num_results: int = 10,
78+
text_contents_options: Optional[
79+
Union[TextContentsOptions, dict[str, Any], bool]
80+
] = None,
7981
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
8082
include_domains: Optional[list[str]] = None,
8183
exclude_domains: Optional[list[str]] = None,
@@ -84,9 +86,30 @@ def _run(
8486
start_published_date: Optional[str] = None,
8587
end_published_date: Optional[str] = None,
8688
use_autoprompt: Optional[bool] = None,
89+
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
90+
summary: Optional[Union[bool, dict[str, str]]] = None,
91+
type: Optional[Literal["neural", "keyword", "auto"]] = None,
8792
run_manager: Optional[CallbackManagerForToolRun] = None,
8893
) -> Union[list[dict], str]:
89-
"""Use the tool."""
94+
"""Use the tool.
95+
96+
Args:
97+
query: The search query.
98+
num_results: The number of search results to return (1 to 100). Default: 10
99+
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
100+
highlights: Whether to include highlights in the results.
101+
include_domains: A list of domains to include in the search.
102+
exclude_domains: A list of domains to exclude from the search.
103+
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
104+
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
105+
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
106+
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
107+
use_autoprompt: Whether to use autoprompt for the search.
108+
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
109+
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
110+
type: The type of search, 'keyword', 'neural', or 'auto'.
111+
run_manager: The run manager for callbacks.
112+
""" # noqa: E501
90113
try:
91114
return self.client.search_and_contents(
92115
query,
@@ -100,6 +123,9 @@ def _run(
100123
start_published_date=start_published_date,
101124
end_published_date=end_published_date,
102125
use_autoprompt=use_autoprompt,
126+
livecrawl=livecrawl,
127+
summary=summary,
128+
type=type,
103129
) # type: ignore
104130
except Exception as e:
105131
return repr(e)
@@ -128,8 +154,10 @@ def validate_environment(cls, values: dict) -> Any:
128154
def _run(
129155
self,
130156
url: str,
131-
num_results: int,
132-
text_contents_options: Optional[Union[TextContentsOptions, bool]] = None,
157+
num_results: int = 10,
158+
text_contents_options: Optional[
159+
Union[TextContentsOptions, dict[str, Any], bool]
160+
] = None,
133161
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
134162
include_domains: Optional[list[str]] = None,
135163
exclude_domains: Optional[list[str]] = None,
@@ -139,9 +167,29 @@ def _run(
139167
end_published_date: Optional[str] = None,
140168
exclude_source_domain: Optional[bool] = None,
141169
category: Optional[str] = None,
170+
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
171+
summary: Optional[Union[bool, dict[str, str]]] = None,
142172
run_manager: Optional[CallbackManagerForToolRun] = None,
143173
) -> Union[list[dict], str]:
144-
"""Use the tool."""
174+
"""Use the tool.
175+
176+
Args:
177+
url: The URL to find similar pages for.
178+
num_results: The number of search results to return (1 to 100). Default: 10
179+
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
180+
highlights: Whether to include highlights in the results.
181+
include_domains: A list of domains to include in the search.
182+
exclude_domains: A list of domains to exclude from the search.
183+
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
184+
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
185+
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
186+
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
187+
exclude_source_domain: If True, exclude pages from the same domain as the source URL.
188+
category: Filter for similar pages by category.
189+
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
190+
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
191+
run_manager: The run manager for callbacks.
192+
""" # noqa: E501
145193
try:
146194
return self.client.find_similar_and_contents(
147195
url,
@@ -156,6 +204,8 @@ def _run(
156204
end_published_date=end_published_date,
157205
exclude_source_domain=exclude_source_domain,
158206
category=category,
207+
livecrawl=livecrawl,
208+
summary=summary,
159209
) # type: ignore
160210
except Exception as e:
161211
return repr(e)

libs/partners/exa/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ build-backend = "pdm.backend"
66
authors = []
77
license = { text = "MIT" }
88
requires-python = ">=3.9"
9-
dependencies = ["langchain-core<1.0.0,>=0.3.15", "exa-py<2.0.0,>=1.0.8"]
9+
dependencies = ["langchain-core<1.0.0,>=0.3.60", "exa-py<2.0.0,>=1.0.8"]
1010
name = "langchain-exa"
11-
version = "0.2.1"
11+
version = "0.3.0"
1212
description = "An integration package connecting Exa and LangChain"
1313
readme = "README.md"
1414

libs/partners/exa/tests/integration_tests/test_retriever.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,19 @@ def test_exa_retriever_highlights() -> None:
2626
assert isinstance(highlight_scores, list)
2727
assert isinstance(highlights[0], str)
2828
assert isinstance(highlight_scores[0], float)
29+
30+
31+
def test_exa_retriever_advanced_features() -> None:
32+
retriever = ExaSearchRetriever(
33+
k=3, text_contents_options={"max_characters": 1000}, summary=True, type="auto"
34+
)
35+
res = retriever.invoke("best time to visit japan")
36+
print(res) # noqa: T201
37+
assert len(res) == 3 # requested k=3
38+
assert isinstance(res, list)
39+
assert isinstance(res[0], Document)
40+
# Verify summary is in metadata
41+
assert "summary" in res[0].metadata
42+
assert isinstance(res[0].metadata["summary"], str)
43+
# Verify text was limited
44+
assert len(res[0].page_content) <= 1000

libs/partners/exa/tests/integration_tests/test_search_tool.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,23 @@ def test_search_tool() -> None:
88
res = tool.invoke({"query": "best time to visit japan", "num_results": 5})
99
print(res) # noqa: T201
1010
assert not isinstance(res, str) # str means error for this tool\
11+
12+
13+
def test_search_tool_advanced_features() -> None:
14+
tool = ExaSearchResults()
15+
res = tool.invoke(
16+
{
17+
"query": "best time to visit japan",
18+
"num_results": 3,
19+
"text_contents_options": {"max_characters": 1000},
20+
"summary": True,
21+
"type": "auto",
22+
}
23+
)
24+
print(res) # noqa: T201
25+
assert not isinstance(res, str) # str means error for this tool
26+
assert len(res.results) == 3
27+
# Verify summary exists
28+
assert hasattr(res.results[0], "summary")
29+
# Verify text was limited
30+
assert len(res.results[0].text) <= 1000

libs/partners/exa/uv.lock

Lines changed: 4 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)