Skip to content

Commit 314306f

Browse files
committed
updated webUrlscraper.py
1 parent cdf888d commit 314306f

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

automaticwebsiteurlscraper.py/webUrlscraper.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,27 @@
1010

1111
#getting in the website link
1212

13-
Url=input("Enter your Urllink")
13+
Url = input("Enter your Urllink")
1414
try:
1515
#trying to access the page
16-
page=Request(Url,headers={'User-Agent':'Mozilla/5.0'})
17-
page=urlopen(page,context=ctx).read()
16+
page = Request(Url,headers={'User-Agent':'Mozilla/5.0'})
17+
page = urlopen(page,context=ctx).read()
1818
#Using beautifulsoup to read the contents of the page
19-
soup=BeautifulSoup(page,'html.parser')
19+
soup = BeautifulSoup(page,'html.parser')
2020
#finding all the link headers
2121
links = soup.findAll('a')
22-
if(links != None):
22+
if(links is not None):
2323
finalLinks = []
2424
#getting actual site links from the header a
2525
for link in links:
2626
if 'href' in str(link):
27-
templist=str(link).split("href")
28-
index1=templist[-1].index("\"")
29-
index2=templist[-1][index1+1:].index("\"")
27+
templist = str(link).split("href")
28+
index1 = templist[-1].index("\"")
29+
index2 = templist[-1][index1 + 1 :].index("\"")
3030
finalLinks.append(templist[-1][index1:index2+3])
3131
print("Here are your final links")
3232
#printing the final completed list
3333
for i in finalLinks:
3434
print(i)
3535
except Exception as e:
36-
print(str(e))
37-
36+
print(str( e ))

0 commit comments

Comments
 (0)