Skip to content

Commit 59af73c

Browse files
committed
update .py
1 parent 4ea3dfa commit 59af73c

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

automaticwebsiteurlscraper.py/webUrlscraper.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,29 @@
55
from urllib.request import urlopen, Request
66
from bs4 import BeautifulSoup
77

8-
ctx = ssl.create_default_context()
8+
ctx=ssl.create_default_context()
99
ctx.check_hostname = False
1010
ctx.verify_mode = ssl.CERT_NONE
1111

12-
#getting in the website link
13-
14-
Url = input("Enter your Urllink")
15-
try:
12+
# getting in the website link
13+
Url=input("Enter your Urllink")
14+
try :
1615
# trying to access the page
17-
page = Request(Url, headers = {'User-Agent':'Mozilla/5.0'})
18-
page = urlopen(page, context = ctx ).read()
16+
page=Request(Url, headers={'User-Agent':'Mozilla/5.0'})
17+
page=urlopen(page, context=ctx, ).read()
1918
# Using beautifulsoup to read the contents of the page
20-
soup = BeautifulSoup(page,'html.parser')
19+
soup = BeautifulSoup(page, 'html.parser')
2120
# finding all the link headers
2221
links = soup.findAll('a')
23-
if(links is not None) :
22+
if(links is not None) :
2423
finalLinks = []
2524
# getting actual site links from the header a
2625
for link in links :
27-
if 'href' in str ( link ):
28-
templist = str ( link ).split("href")
29-
index1 = templist[ -1 ].index("\"")
30-
index2 = templist[ -1 ][ index1 + 1 : ].index( "\"" )
31-
finalLinks.append( templist[-1][ index1 : index2 + 3 ] )
26+
if 'href' in str(link):
27+
templist = str(link).split("href")
28+
index1 = templist[-1].index("\"")
29+
index2 = templist[-1][index1 + 1 : ].index( "\"" )
30+
finalLinks.append(templist[-1][ index1 : index2 + 3 ])
3231
print("Here are your final links")
3332
# printing the final completed list
3433
for i in finalLinks:

0 commit comments

Comments
 (0)