|
5 | 5 | from urllib.request import urlopen, Request
|
6 | 6 | from bs4 import BeautifulSoup
|
7 | 7 |
|
8 |
| -ctx = ssl.create_default_context() |
| 8 | +ctx=ssl.create_default_context() |
9 | 9 | ctx.check_hostname = False
|
10 | 10 | ctx.verify_mode = ssl.CERT_NONE
|
11 | 11 |
|
12 |
| -#getting in the website link |
13 |
| - |
14 |
| -Url = input("Enter your Urllink") |
15 |
| -try: |
| 12 | +# getting in the website link |
| 13 | +Url=input("Enter your Urllink") |
| 14 | +try : |
16 | 15 | # trying to access the page
|
17 |
| - page = Request(Url, headers = {'User-Agent':'Mozilla/5.0'}) |
18 |
| - page = urlopen(page, context = ctx ).read() |
| 16 | + page=Request(Url, headers={'User-Agent':'Mozilla/5.0'}) |
| 17 | + page=urlopen(page, context=ctx, ).read() |
19 | 18 | # Using beautifulsoup to read the contents of the page
|
20 |
| - soup = BeautifulSoup(page,'html.parser') |
| 19 | + soup = BeautifulSoup(page, 'html.parser') |
21 | 20 | # finding all the link headers
|
22 | 21 | links = soup.findAll('a')
|
23 |
| - if(links is not None) : |
| 22 | + if(links is not None) : |
24 | 23 | finalLinks = []
|
25 | 24 | # getting actual site links from the header a
|
26 | 25 | for link in links :
|
27 |
| - if 'href' in str ( link ): |
28 |
| - templist = str ( link ).split("href") |
29 |
| - index1 = templist[ -1 ].index("\"") |
30 |
| - index2 = templist[ -1 ][ index1 + 1 : ].index( "\"" ) |
31 |
| - finalLinks.append( templist[-1][ index1 : index2 + 3 ] ) |
| 26 | + if 'href' in str(link): |
| 27 | + templist = str(link).split("href") |
| 28 | + index1 = templist[-1].index("\"") |
| 29 | + index2 = templist[-1][index1 + 1 : ].index( "\"" ) |
| 30 | + finalLinks.append(templist[-1][ index1 : index2 + 3 ]) |
32 | 31 | print("Here are your final links")
|
33 | 32 | # printing the final completed list
|
34 | 33 | for i in finalLinks:
|
|
0 commit comments