Skip to content

Commit b25e8e3

Browse files
committed
Merge branch 'pre/beta' into refactoring-tokenization
2 parents cace433 + 38cba96 commit b25e8e3

File tree

258 files changed

+4095
-920
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

258 files changed

+4095
-920
lines changed

CHANGELOG.md

Lines changed: 355 additions & 6 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,26 +32,38 @@ playwright install
3232

3333
**Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱
3434

35-
By the way if you to use not mandatory modules it is necessary to install by yourself with the following command:
35+
<details>
36+
<summary><b>Optional Dependencies</b></summary>
37+
Additional dependecies can be added while installing the library:
3638

37-
### Installing "Other Language Models"
39+
- <b>More Language Models</b>: additional language models are installed, such as Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
3840

39-
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
41+
42+
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints.
4043
```bash
4144
pip install scrapegraphai[other-language-models]
4245

43-
```
44-
### Installing "More Semantic Options"
46+
- <b>Semantic Options</b>: this group includes tools for advanced semantic processing, such as Graphviz.
47+
48+
```bash
49+
pip install scrapegraphai[more-semantic-options]
50+
```
51+
52+
- <b>Browsers Options</b>: this group includes additional browser management tools/services, such as Browserbase.
53+
54+
```bash
55+
pip install scrapegraphai[more-browser-options]
56+
```
57+
58+
</details>
59+
60+
4561

46-
This group includes tools for advanced semantic processing, such as Graphviz.
47-
```bash
48-
pip install scrapegraphai[more-semantic-options]
49-
```
5062
### Installing "More Browser Options"
5163

52-
This group includes additional browser management options, such as BrowserBase.
64+
This group includes an ocr scraper for websites
5365
```bash
54-
pip install scrapegraphai[more-browser-options]
66+
pip install scrapegraphai[screenshot_scraper]
5567
```
5668

5769
## 💻 Usage
@@ -68,7 +80,7 @@ from scrapegraphai.graphs import SmartScraperGraph
6880
graph_config = {
6981
"llm": {
7082
"api_key": "YOUR_OPENAI_APIKEY",
71-
"model": "gpt-4o-mini",
83+
"model": "openai/gpt-4o-mini",
7284
},
7385
"verbose": True,
7486
"headless": False,
@@ -128,6 +140,9 @@ Check out also the Docusaurus [here](https://scrapegraph-doc.onrender.com/).
128140

129141
## 🏆 Sponsors
130142
<div style="text-align: center;">
143+
<a href="https://2ly.link/1zaXG">
144+
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/browserbase_logo.png" alt="Browserbase" style="width: 10%;">
145+
</a>
131146
<a href="https://2ly.link/1zNiz">
132147
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/serp_api_logo.png" alt="SerpAPI" style="width: 10%;">
133148
</a>

docs/assets/browserbase_logo.png

3.02 KB
Loading

docs/source/introduction/overview.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ FAQ
8282
Sponsors
8383
========
8484

85+
.. image:: ../../assets/browserbase_logo.png
86+
:width: 10%
87+
:alt: Browserbase
88+
:target: https://www.browserbase.com/
89+
8590
.. image:: ../../assets/serp_api_logo.png
8691
:width: 10%
8792
:alt: Serp API

examples/anthropic/csv_scraper_graph_multi_haiku.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
graph_config = {
2727
"llm": {
2828
"api_key": os.getenv("ANTHROPIC_API_KEY"),
29-
"model": "claude-3-haiku-20240307",
30-
"max_tokens": 4000},
29+
"model": "anthropic/claude-3-haiku-20240307",
30+
},
3131
}
3232

3333
# ************************************************

examples/anthropic/csv_scraper_haiku.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@
3232
graph_config = {
3333
"llm": {
3434
"api_key": os.getenv("ANTHROPIC_API_KEY"),
35-
"model": "claude-3-haiku-20240307",
36-
"max_tokens": 4000
37-
},
35+
"model": "anthropic/claude-3-haiku-20240307",
36+
},
3837
}
3938

4039
# ************************************************

examples/anthropic/custom_graph_haiku.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
import os
66
from dotenv import load_dotenv
77

8-
from langchain_openai import OpenAIEmbeddings
9-
from langchain_openai import ChatOpenAI
8+
from langchain_anthropic import ChatAnthropic
109
from scrapegraphai.graphs import BaseGraph
11-
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
10+
from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode
1211
load_dotenv()
1312

1413
# ************************************************
@@ -19,16 +18,14 @@
1918
"llm": {
2019
"api_key": os.getenv("ANTHROPIC_API_KEY"),
2120
"model": "claude-3-haiku-20240307",
22-
"max_tokens": 4000
23-
},
21+
},
2422
}
2523

2624
# ************************************************
2725
# Define the graph nodes
2826
# ************************************************
2927

30-
llm_model = ChatOpenAI(graph_config["llm"])
31-
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
28+
llm_model = ChatAnthropic(graph_config["llm"])
3229

3330
# define the nodes for the graph
3431
robot_node = RobotsNode(
@@ -43,7 +40,7 @@
4340

4441
fetch_node = FetchNode(
4542
input="url | local_dir",
46-
output=["doc", "link_urls", "img_urls"],
43+
output=["doc"],
4744
node_config={
4845
"verbose": True,
4946
"headless": True,
@@ -76,14 +73,12 @@
7673
robot_node,
7774
fetch_node,
7875
parse_node,
79-
rag_node,
8076
generate_answer_node,
8177
],
8278
edges=[
8379
(robot_node, fetch_node),
8480
(fetch_node, parse_node),
85-
(parse_node, rag_node),
86-
(rag_node, generate_answer_node)
81+
(parse_node, generate_answer_node)
8782
],
8883
entry_point=robot_node
8984
)

examples/anthropic/json_scraper_haiku.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@
2626
graph_config = {
2727
"llm": {
2828
"api_key": os.getenv("ANTHROPIC_API_KEY"),
29-
"model": "claude-3-haiku-20240307",
30-
"max_tokens": 4000
31-
},
29+
"model": "anthropic/claude-3-haiku-20240307",
30+
},
3231
}
3332

3433
# ************************************************

examples/anthropic/json_scraper_multi_haiku.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
graph_config = {
1212
"llm": {
1313
"api_key": os.getenv("ANTHROPIC_API_KEY"),
14-
"model": "claude-3-haiku-20240307",
15-
"max_tokens": 4000
16-
},
14+
"model": "anthropic/claude-3-haiku-20240307",
15+
},
1716
}
1817

1918
FILE_NAME = "inputs/example.json"

examples/anthropic/pdf_scraper_graph_haiku.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414
graph_config = {
1515
"llm": {
1616
"api_key": os.getenv("ANTHROPIC_API_KEY"),
17-
"model": "claude-3-haiku-20240307",
18-
"max_tokens": 4000
19-
},
17+
"model": "anthropic/claude-3-haiku-20240307",
18+
},
2019
}
2120

2221
source = """

0 commit comments

Comments
 (0)