Skip to content

Commit 5d1fe68

Browse files
authored
Merge branch 'pre/beta' into temp
2 parents dcef172 + bd2afef commit 5d1fe68

File tree

173 files changed

+3794
-803
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+3794
-803
lines changed

CHANGELOG.md

Lines changed: 233 additions & 2 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ Additional dependecies can be added while installing the library:
3838

3939
- <b>More Language Models</b>: additional language models are installed, such as Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
4040

41-
```bash
42-
pip install scrapegraphai[other-language-models]
43-
```
41+
42+
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints.
43+
```bash
44+
pip install scrapegraphai[other-language-models]
4445

4546
- <b>Semantic Options</b>: this group includes tools for advanced semantic processing, such as Graphviz.
4647

@@ -58,6 +59,13 @@ Additional dependecies can be added while installing the library:
5859

5960

6061

62+
### Installing "More Browser Options"
63+
64+
This group includes an ocr scraper for websites
65+
```bash
66+
pip install scrapegraphai[screenshot_scraper]
67+
```
68+
6169
## 💻 Usage
6270
There are multiple standard scraping pipelines that can be used to extract information from a website (or local file).
6371

docs/chinese.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ from scrapegraphai.graphs import SpeechGraph
133133
graph_config = {
134134
"llm": {
135135
"api_key": "OPENAI_API_KEY",
136-
"model": "gpt-3.5-turbo",
136+
"model": "openai/gpt-3.5-turbo",
137137
},
138138
"tts_model": {
139139
"api_key": "OPENAI_API_KEY",

docs/japanese.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ from scrapegraphai.graphs import SpeechGraph
133133
graph_config = {
134134
"llm": {
135135
"api_key": "OPENAI_API_KEY",
136-
"model": "gpt-3.5-turbo",
136+
"model": "openai/gpt-3.5-turbo",
137137
},
138138
"tts_model": {
139139
"api_key": "OPENAI_API_KEY",

docs/korean.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ from scrapegraphai.graphs import SpeechGraph
132132
graph_config = {
133133
"llm": {
134134
"api_key": "OPENAI_API_KEY",
135-
"model": "gpt-3.5-turbo",
135+
"model": "openai/gpt-3.5-turbo",
136136
},
137137
"tts_model": {
138138
"api_key": "OPENAI_API_KEY",

docs/russian.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ from scrapegraphai.graphs import SpeechGraph
138138
graph_config = {
139139
"llm": {
140140
"api_key": "OPENAI_API_KEY",
141-
"model": "gpt-3.5-turbo",
141+
"model": "openai/gpt-3.5-turbo",
142142
},
143143
"tts_model": {
144144
"api_key": "OPENAI_API_KEY",

docs/source/getting_started/examples.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ OpenAI models
2222
graph_config = {
2323
"llm": {
2424
"api_key": openai_key,
25-
"model": "gpt-3.5-turbo",
25+
"model": "openai/gpt-3.5-turbo",
2626
},
2727
}
2828

examples/anthropic/custom_graph_haiku.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
fetch_node = FetchNode(
4242
input="url | local_dir",
43-
output=["doc", "link_urls", "img_urls"],
43+
output=["doc"],
4444
node_config={
4545
"verbose": True,
4646
"headless": True,
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
11+
# required environment variables in .env
12+
# ANTHROPIC_API_KEY
13+
load_dotenv()
14+
15+
# ************************************************
16+
# Create the SmartScraperGraph instance and run it
17+
# ************************************************
18+
19+
graph_config = {
20+
"llm": {
21+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
22+
"model": "anthropic/claude-3-haiku-20240307",
23+
"rate_limit": {
24+
"requests_per_second": 1
25+
}
26+
},
27+
}
28+
29+
smart_scraper_graph = SmartScraperGraph(
30+
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
31+
event_end_date, event_end_time, location, event_mode, event_category,
32+
third_party_redirect, no_of_days,
33+
time_in_hours, hosted_or_attending, refreshments_type,
34+
registration_available, registration_link""",
35+
# also accepts a string with the already downloaded HTML code
36+
source="https://www.hmhco.com/event",
37+
config=graph_config
38+
)
39+
40+
result = smart_scraper_graph.run()
41+
print(result)
42+
43+
# ************************************************
44+
# Get graph execution info
45+
# ************************************************
46+
47+
graph_exec_info = smart_scraper_graph.get_execution_info()
48+
print(prettify_exec_info(graph_exec_info))
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
import json
7+
from dotenv import load_dotenv
8+
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Create the SmartScraperGraph instance and run it
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
19+
"model": "anthropic/claude-3-haiku-20240307",
20+
},
21+
}
22+
23+
24+
# *******************************************************
25+
# Create the SmartScraperMultiGraph instance and run it
26+
# *******************************************************
27+
28+
multiple_search_graph = SmartScraperMultiConcatGraph(
29+
prompt="Who is Marco Perini?",
30+
source= [
31+
"https://perinim.github.io/",
32+
"https://perinim.github.io/cv/"
33+
],
34+
schema=None,
35+
config=graph_config
36+
)
37+
38+
result = multiple_search_graph.run()
39+
print(json.dumps(result, indent=4))

0 commit comments

Comments
 (0)