Skip to content

Commit d248646

Browse files
committed
Merge branch 'pre/beta' into screenshot_scraper
2 parents fee77d1 + d617750 commit d248646

17 files changed

+209
-85
lines changed

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
## [1.14.0-beta.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0-beta.10...v1.14.0-beta.11) (2024-08-19)
2+
3+
4+
### Features
5+
6+
* add structured output format ([7d2fc67](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7d2fc672c8c3c05b0f0beac46316ce16c16bcd02))
7+
* **GenerateAnswerNode:** built-in structured output through LangChain ([d29338b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d29338b7c2ef0b13535a2e4edae4a4aab08f1825))
8+
9+
10+
### Bug Fixes
11+
12+
* **ParseNode:** leave room for LLM reply in context window ([683bf57](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/683bf57d895d8f6847fdd64e8936ffa1aa91926a))
13+
14+
## [1.14.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0-beta.9...v1.14.0-beta.10) (2024-08-19)
15+
16+
17+
### Features
18+
19+
* Implemented a filter logic in search_link_node.py ([08e9d9d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/08e9d9d6a09f450a9f512ac2789287819ced9641))
20+
121
## [1.14.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0-beta.8...v1.14.0-beta.9) (2024-08-17)
222

323

examples/anthropic/search_graph_schema_haiku.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
"""
44

55
import os
6+
from typing import List
67
from dotenv import load_dotenv
7-
load_dotenv()
8-
8+
from pydantic import BaseModel, Field
99
from scrapegraphai.graphs import SearchGraph
1010

11-
from pydantic import BaseModel, Field
12-
from typing import List
11+
load_dotenv()
1312

1413
# ************************************************
1514
# Define the output schema for the graph

examples/azure/smart_scraper_schema_azure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
Basic example of scraping pipeline using SmartScraper with schema
33
"""
44

5-
import os, json
5+
import os
6+
import json
67
from typing import List
78
from pydantic import BaseModel, Field
89
from dotenv import load_dotenv

examples/local_models/search_link_graph_ollama.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,26 @@
99

1010
graph_config = {
1111
"llm": {
12-
"model": "ollama/llama3",
12+
"model": "ollama/llama3.1:8b",
1313
"temperature": 0,
1414
"format": "json", # Ollama needs the format to be specified explicitly
1515
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
1616
},
1717

1818
"verbose": True,
19-
"headless": False
19+
"headless": False,
20+
"filter_config": {
21+
"diff_domain_filter": True,
22+
# "img_exts": ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.ico'],
23+
# "lang_indicators": ['lang=', '/fr', '/pt', '/es', '/de', '/jp', '/it'],
24+
# "irrelevant_keywords": [
25+
# '/login', '/signup', '/register', '/contact', 'facebook.com', 'twitter.com',
26+
# 'linkedin.com', 'instagram.com', '.js', '.css', '/wp-content/', '/wp-admin/',
27+
# '/wp-includes/', '/wp-json/', '/wp-comments-post.php', ';amp', '/about',
28+
# '/careers', '/jobs', '/privacy', '/terms', '/legal', '/faq', '/help',
29+
# '.pdf', '.zip', '/news', '/files', '/downloads'
30+
# ]
31+
},
2032
}
2133

2234
# ************************************************

examples/local_models/smart_scraper_schema_ollama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class Projects(BaseModel):
1919

2020
graph_config = {
2121
"llm": {
22-
"model": "ollama/llama3",
22+
"model": "ollama/llama3.1",
2323
"temperature": 0,
2424
"format": "json", # Ollama needs the format to be specified explicitly
2525
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily

examples/openai/smart_scraper_schema_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class Projects(BaseModel):
3030
graph_config = {
3131
"llm": {
3232
"api_key":openai_key,
33-
"model": "gpt-4o",
33+
"model": "gpt-4o-mini",
3434
},
3535
"verbose": True,
3636
"headless": False,

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "scrapegraphai"
33

44

5-
version = "1.14.0b9"
5+
version = "1.14.0b11"
66

77

88
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
@@ -14,12 +14,12 @@ authors = [
1414
]
1515

1616
dependencies = [
17-
"langchain>=0.2.10",
17+
"langchain>=0.2.14",
1818
"langchain-fireworks>=0.1.3",
1919
"langchain_community>=0.2.9",
2020
"langchain-google-genai>=1.0.7",
2121
"langchain-google-vertexai>=1.0.7",
22-
"langchain-openai>=0.1.17",
22+
"langchain-openai>=0.1.22",
2323
"langchain-groq>=0.1.3",
2424
"langchain-aws>=0.1.3",
2525
"langchain-anthropic>=0.1.11",

requirements-dev.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ jsonschema-specifications==2023.12.1
255255
# via jsonschema
256256
kiwisolver==1.4.5
257257
# via matplotlib
258-
langchain==0.2.12
258+
langchain==0.2.14
259259
# via langchain-community
260260
# via scrapegraphai
261261
langchain-anthropic==0.1.22
@@ -264,7 +264,7 @@ langchain-aws==0.1.16
264264
# via scrapegraphai
265265
langchain-community==0.2.11
266266
# via scrapegraphai
267-
langchain-core==0.2.29
267+
langchain-core==0.2.33
268268
# via langchain
269269
# via langchain-anthropic
270270
# via langchain-aws
@@ -292,7 +292,7 @@ langchain-mistralai==0.1.12
292292
# via scrapegraphai
293293
langchain-nvidia-ai-endpoints==0.2.1
294294
# via scrapegraphai
295-
langchain-openai==0.1.21
295+
langchain-openai==0.1.22
296296
# via scrapegraphai
297297
langchain-text-splitters==0.2.2
298298
# via langchain

requirements.lock

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ jinja2==3.1.4
178178
# via torch
179179
jiter==0.5.0
180180
# via anthropic
181+
# via openai
181182
jmespath==1.0.1
182183
# via boto3
183184
# via botocore
@@ -187,7 +188,7 @@ jsonpatch==1.33
187188
# via langchain-core
188189
jsonpointer==3.0.0
189190
# via jsonpatch
190-
langchain==0.2.11
191+
langchain==0.2.14
191192
# via langchain-community
192193
# via scrapegraphai
193194
langchain-anthropic==0.1.20
@@ -196,7 +197,7 @@ langchain-aws==0.1.12
196197
# via scrapegraphai
197198
langchain-community==0.2.10
198199
# via scrapegraphai
199-
langchain-core==0.2.28
200+
langchain-core==0.2.33
200201
# via langchain
201202
# via langchain-anthropic
202203
# via langchain-aws
@@ -224,7 +225,7 @@ langchain-mistralai==0.1.12
224225
# via scrapegraphai
225226
langchain-nvidia-ai-endpoints==0.1.7
226227
# via scrapegraphai
227-
langchain-openai==0.1.17
228+
langchain-openai==0.1.22
228229
# via scrapegraphai
229230
langchain-text-splitters==0.2.2
230231
# via langchain
@@ -264,7 +265,7 @@ numpy==1.26.4
264265
# via sentence-transformers
265266
# via shapely
266267
# via transformers
267-
openai==1.37.0
268+
openai==1.41.0
268269
# via langchain-fireworks
269270
# via langchain-openai
270271
orjson==3.10.6

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
langchain>=0.2.10
1+
langchain>=0.2.14
22
langchain-fireworks>=0.1.3
33
langchain_community>=0.2.9
44
langchain-google-genai>=1.0.7
55
langchain-google-vertexai>=1.0.7
6-
langchain-openai>=0.1.17
6+
langchain-openai>=0.1.22
77
langchain-groq>=0.1.3
88
langchain-aws>=0.1.3
99
langchain-anthropic>=0.1.11

0 commit comments

Comments
 (0)