Skip to content

Commit f7f7d83

Browse files
author
lenarsaitov
committed
add time.sleep(10) to avoid getting 429 status and do refactor
1 parent 92580c1 commit f7f7d83

File tree

13 files changed

+30
-15
lines changed

13 files changed

+30
-15
lines changed

cianparser/base.py renamed to cianparser/base_list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
class BaseListPageParser:
88
def __init__(self,
99
session,
10-
deal_type: str, rent_period_type, location_name: str,
10+
accommodation_type: str, deal_type: str, rent_period_type, location_name: str,
1111
with_saving_csv=False, with_extra_data=False,
1212
object_type=None, additional_settings=None):
13-
self.accommodation_type = "suburban"
13+
self.accommodation_type = accommodation_type
1414
self.session = session
1515
self.deal_type = deal_type
1616
self.rent_period_type = rent_period_type

cianparser/cianparser.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import cloudscraper
2+
import time
23

34
from cianparser.constants import CITIES, METRO_STATIONS, DEAL_TYPES, OBJECT_SUBURBAN_TYPES
45
from cianparser.url_builder import URLBuilder
5-
from cianparser.flat_list import FlatListPageParser
6-
from cianparser.suburban_list import SuburbanListPageParser
7-
from cianparser.newobject_list import NewObjectListParser
86
from cianparser.proxy_pool import ProxyPool
7+
from cianparser.flat.list import FlatListPageParser
8+
from cianparser.suburban.list import SuburbanListPageParser
9+
from cianparser.newobject.list import NewObjectListParser
910

1011

1112
def list_locations():
@@ -42,14 +43,16 @@ def __set_proxy__(self, url_list):
4243
if available_proxy is not None:
4344
self.__session__.proxies = {"https": available_proxy}
4445

45-
def __load_list_page__(self, url_list_format, page_number):
46+
def __load_list_page__(self, url_list_format, page_number, attempt_number_exception):
4647
url_list = url_list_format.format(page_number)
4748
self.__set_proxy__(url_list)
4849

49-
if page_number == self.__parser__.start_page:
50+
if page_number == self.__parser__.start_page and attempt_number_exception == 0:
5051
print(f"The page from which the collection of information begins: \n {url_list}")
5152

5253
res = self.__session__.get(url=url_list)
54+
if res.status_code == 429:
55+
time.sleep(10)
5356
res.raise_for_status()
5457

5558
return res.text
@@ -70,7 +73,7 @@ def __run__(self, url_list_format: str):
7073
while attempt_number_exception < 3 and not page_parsed:
7174
try:
7275
(page_parsed, attempt_number, end_all_parsing) = self.__parser__.parse_list_offers_page(
73-
html=self.__load_list_page__(url_list_format=url_list_format, page_number=page_number),
76+
html=self.__load_list_page__(url_list_format=url_list_format, page_number=page_number, attempt_number_exception=attempt_number_exception),
7477
page_number=page_number,
7578
count_of_pages=self.__parser__.end_page + 1 - self.__parser__.start_page,
7679
attempt_number=attempt_number_exception)
@@ -105,6 +108,7 @@ def get_flats(self, deal_type: str, rooms, with_saving_csv=False, with_extra_dat
105108
deal_type, rent_period_type = __define_deal_type__(deal_type)
106109
self.__parser__ = FlatListPageParser(
107110
session=self.__session__,
111+
accommodation_type="flat",
108112
deal_type=deal_type,
109113
rent_period_type=rent_period_type,
110114
location_name=self.__location_name__,
@@ -137,6 +141,7 @@ def get_suburban(self, suburban_type: str, deal_type: str, with_saving_csv=False
137141
deal_type, rent_period_type = __define_deal_type__(deal_type)
138142
self.__parser__ = SuburbanListPageParser(
139143
session=self.__session__,
144+
accommodation_type="suburban",
140145
deal_type=deal_type,
141146
rent_period_type=rent_period_type,
142147
location_name=self.__location_name__,
File renamed without changes.

cianparser/flat_list.py renamed to cianparser/flat/list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from cianparser.constants import FILE_NAME_FLAT_FORMAT
88
from cianparser.helpers import union_dicts, define_author, define_location_data, define_specification_data, define_deal_url_id, define_price_data
9-
from cianparser.flat import FlatPageParser
10-
from cianparser.base import BaseListPageParser
9+
from cianparser.flat.page import FlatPageParser
10+
from cianparser.base_list import BaseListPageParser
1111

1212

1313
class FlatListPageParser(BaseListPageParser):

cianparser/flat.py renamed to cianparser/flat/page.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import bs4
22
import re
3+
import time
34

45

56
class FlatPageParser:
@@ -9,6 +10,8 @@ def __init__(self, session, url):
910

1011
def __load_page__(self):
1112
res = self.session.get(self.url)
13+
if res.status_code == 429:
14+
time.sleep(10)
1215
res.raise_for_status()
1316
self.offer_page_html = res.text
1417
self.offer_page_soup = bs4.BeautifulSoup(self.offer_page_html, 'html.parser')

cianparser/newobject_list.py renamed to cianparser/newobject/list.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from cianparser.constants import FILE_NAME_NEWOBJECT_FORMAT
1111
from cianparser.helpers import union_dicts
12-
from cianparser.newobject import NewObjectPageParser
12+
from cianparser.newobject.page import NewObjectPageParser
1313

1414

1515
class NewObjectListParser:

cianparser/newobject.py renamed to cianparser/newobject/page.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import bs4
22
import re
3+
import time
34

45

56
class NewObjectPageParser:
@@ -9,6 +10,8 @@ def __init__(self, session, url):
910

1011
def __load_page__(self):
1112
res = self.session.get(self.url)
13+
if res.status_code == 429:
14+
time.sleep(10)
1215
res.raise_for_status()
1316
self.offer_page_html = res.text
1417
self.offer_page_soup = bs4.BeautifulSoup(self.offer_page_html, 'html.parser')

cianparser/suburban_list.py renamed to cianparser/suburban/list.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from cianparser.constants import FILE_NAME_SUBURBAN_FORMAT
88
from cianparser.helpers import union_dicts, define_author, parse_location_data, define_price_data, define_deal_url_id
9-
from cianparser.suburban import SuburbanPageParser
10-
from cianparser.base import BaseListPageParser
9+
from cianparser.suburban.page import SuburbanPageParser
10+
from cianparser.base_list import BaseListPageParser
1111

1212

1313
class SuburbanListPageParser(BaseListPageParser):

cianparser/suburban.py renamed to cianparser/suburban/page.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import time
2+
13
import bs4
24

35

@@ -8,6 +10,8 @@ def __init__(self, session, url):
810

911
def __load_page__(self):
1012
res = self.session.get(self.url)
13+
if res.status_code == 429:
14+
time.sleep(10)
1115
res.raise_for_status()
1216
self.offer_page_html = res.text
1317
self.offer_page_soup = bs4.BeautifulSoup(self.offer_page_html, 'html.parser')

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = cianparser
3-
version = 1.0.1
3+
version = 1.0.2
44
description = Parser information from Cian website
55
url = https://github.com/lenarsaitov/cianparser
66
author = Lenar Saitov

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setup(
88
name='cianparser',
9-
version='1.0.1',
9+
version='1.0.2',
1010
description='Parser information from Cian website',
1111
url='https://github.com/lenarsaitov/cianparser',
1212
author='Lenar Saitov',

0 commit comments

Comments
 (0)