File tree Expand file tree Collapse file tree 1 file changed +9
-10
lines changed
automaticwebsiteurlscraper.py Expand file tree Collapse file tree 1 file changed +9
-10
lines changed Original file line number Diff line number Diff line change 10
10
11
11
#getting in the website link
12
12
13
- Url = input ("Enter your Urllink" )
13
+ Url = input ("Enter your Urllink" )
14
14
try :
15
15
#trying to access the page
16
- page = Request (Url ,headers = {'User-Agent' :'Mozilla/5.0' })
17
- page = urlopen (page ,context = ctx ).read ()
16
+ page = Request (Url ,headers = {'User-Agent' :'Mozilla/5.0' })
17
+ page = urlopen (page ,context = ctx ).read ()
18
18
#Using beautifulsoup to read the contents of the page
19
- soup = BeautifulSoup (page ,'html.parser' )
19
+ soup = BeautifulSoup (page ,'html.parser' )
20
20
#finding all the link headers
21
21
links = soup .findAll ('a' )
22
- if (links != None ):
22
+ if (links is not None ):
23
23
finalLinks = []
24
24
#getting actual site links from the header a
25
25
for link in links :
26
26
if 'href' in str (link ):
27
- templist = str (link ).split ("href" )
28
- index1 = templist [- 1 ].index ("\" " )
29
- index2 = templist [- 1 ][index1 + 1 :].index ("\" " )
27
+ templist = str (link ).split ("href" )
28
+ index1 = templist [- 1 ].index ("\" " )
29
+ index2 = templist [- 1 ][index1 + 1 :].index ("\" " )
30
30
finalLinks .append (templist [- 1 ][index1 :index2 + 3 ])
31
31
print ("Here are your final links" )
32
32
#printing the final completed list
33
33
for i in finalLinks :
34
34
print (i )
35
35
except Exception as e :
36
- print (str (e ))
37
-
36
+ print (str ( e ))
You can’t perform that action at this time.
0 commit comments