File tree Expand file tree Collapse file tree 5 files changed +1024
-1
lines changed Expand file tree Collapse file tree 5 files changed +1024
-1
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Basic example of scraping pipeline using SmartScraper
3
+ """
4
+ import os
5
+ import json
6
+ from dotenv import load_dotenv
7
+ from scrapegraphai .graphs import SmartScraperGraph
8
+ from scrapegraphai .utils import prettify_exec_info
9
+
10
+ load_dotenv ()
11
+
12
+ # ************************************************
13
+ # Define the configuration for the graph
14
+ # ************************************************
15
+
16
+
17
+ graph_config = {
18
+ "llm" : {
19
+ "model" : "scrapegraphai/smart-scraper" ,
20
+ "api_key" : os .getenv ("SCRAPEGRAPH_API_KEY" )
21
+ },
22
+ "verbose" : True ,
23
+ "headless" : False ,
24
+ }
25
+
26
+ # ************************************************
27
+ # Create the SmartScraperGraph instance and run it
28
+ # ************************************************
29
+
30
+ smart_scraper_graph = SmartScraperGraph (
31
+ prompt = "Extract me all the articles" ,
32
+ source = "https://www.wired.com" ,
33
+ config = graph_config
34
+ )
35
+
36
+ result = smart_scraper_graph .run ()
37
+ print (json .dumps (result , indent = 4 ))
38
+
39
+ # ************************************************
40
+ # Get graph execution info
41
+ # ************************************************
42
+
43
+ graph_exec_info = smart_scraper_graph .get_execution_info ()
44
+ print (prettify_exec_info (graph_exec_info ))
Original file line number Diff line number Diff line change @@ -43,7 +43,8 @@ dependencies = [
43
43
" transformers>=4.44.2" ,
44
44
" googlesearch-python>=1.2.5" ,
45
45
" simpleeval>=1.0.0" ,
46
- " async_timeout>=4.0.3"
46
+ " async_timeout>=4.0.3" ,
47
+ " scrapegraph-py>=0.0.4"
47
48
]
48
49
49
50
license = " MIT"
You can’t perform that action at this time.
0 commit comments