Skip to content

Commit 7b5b418

Browse files
committed
update webUrlscraper.py
1 parent 5ffeeb0 commit 7b5b418

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

automaticwebsiteurlscraper.py/webUrlscraper.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,21 @@
1313
Url = input("Enter your Urllink")
1414
try:
1515
#trying to access the page
16-
page = Request(Url,headers={'User-Agent':'Mozilla/5.0'})
17-
page = urlopen(page,context=ctx).read()
16+
page = Request(Url, headers = { 'User-Agent':'Mozilla/5.0' } )
17+
page = urlopen(page, context = ctx ).read()
1818
#Using beautifulsoup to read the contents of the page
1919
soup = BeautifulSoup(page,'html.parser')
2020
#finding all the link headers
2121
links = soup.findAll('a')
2222
if(links is not None):
23-
finalLinks = []
23+
finalLinks = [ ]
2424
#getting actual site links from the header a
2525
for link in links:
2626
if 'href' in str(link):
2727
templist = str(link).split("href")
2828
index1 = templist[-1].index("\"")
29-
index2 = templist[-1][index1 + 1 :].index("\"")
30-
finalLinks.append(templist[-1][index1:index2+3])
29+
index2 = templist[-1][index1 + 1 : ].index("\"")
30+
finalLinks.append( templist[-1][ index1 : index2 + 3 ] )
3131
print("Here are your final links")
3232
#printing the final completed list
3333
for i in finalLinks:

0 commit comments

Comments
 (0)