Skip to content

Commit 926db71

Browse files
authored
Merge pull request #35 from lenarsaitov/significant-refactor-code
significant refactor code
2 parents 6da8f8f + fb699cd commit 926db71

20 files changed

+1148
-856
lines changed

README.md

Lines changed: 135 additions & 84 deletions
Large diffs are not rendered by default.

cianparser/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .cianparser import parse, list_cities, list_metro_stations
1+
from .cianparser import CianParser, list_locations, list_metro_stations
22

33
__author__ = "lenarsaitov"
44
__mail__ = "lenarsaitov1@yandex.ru"

cianparser/base.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import math
2+
import csv
3+
import pathlib
4+
from datetime import datetime
5+
import transliterate
6+
7+
from cianparser.constants import FILE_NAME_BASE, SPECIFIC_FIELDS_FOR_RENT_LONG, SPECIFIC_FIELDS_FOR_RENT_SHORT, SPECIFIC_FIELDS_FOR_SALE
8+
9+
10+
class BaseListPageParser:
11+
def __init__(self,
12+
session,
13+
deal_type: str, rent_period_type, location_name: str,
14+
with_saving_csv=False, with_extra_data=False,
15+
object_type=None, additional_settings=None):
16+
self.accommodation_type = "suburban"
17+
self.session = session
18+
self.deal_type = deal_type
19+
self.rent_period_type = rent_period_type
20+
self.location_name = location_name
21+
self.with_saving_csv = with_saving_csv
22+
self.with_extra_data = with_extra_data
23+
self.additional_settings = additional_settings
24+
self.object_type = object_type
25+
26+
self.result = []
27+
self.result_set = set()
28+
self.average_price = 0
29+
self.count_parsed_offers = 0
30+
self.start_page = 1 if (additional_settings is None or "start_page" not in additional_settings.keys()) else additional_settings["start_page"]
31+
self.end_page = 100 if (additional_settings is None or "end_page" not in additional_settings.keys()) else additional_settings["end_page"]
32+
self.file_path = self.build_file_path()
33+
34+
def is_sale(self):
35+
return self.deal_type == "sale"
36+
37+
def is_rent_long(self):
38+
return self.deal_type == "rent" and self.rent_period_type == 4
39+
40+
def is_rent_short(self):
41+
return self.deal_type == "rent" and self.rent_period_type == 2
42+
43+
def build_file_path(self):
44+
now_time = datetime.now().strftime("%d_%b_%Y_%H_%M_%S_%f")
45+
file_name = FILE_NAME_BASE.format(self.accommodation_type, self.deal_type, self.start_page, self.end_page, transliterate.translit(self.location_name.lower(), reversed=True), now_time)
46+
return pathlib.Path(pathlib.Path.cwd(), file_name.replace("'", ""))
47+
48+
def print_parse_progress(self, page_number, count_of_pages, offers, ind):
49+
total_planed_offers = len(offers) * count_of_pages
50+
print(f"\r {page_number - self.start_page + 1}"
51+
f" | {page_number} page with list: [" + "=>" * (ind + 1) + " " * (len(offers) - ind - 1) + "]" + f" {math.ceil((ind + 1) * 100 / len(offers))}" + "%" +
52+
f" | Count of all parsed: {self.count_parsed_offers}."
53+
f" Progress ratio: {math.ceil(self.count_parsed_offers * 100 / total_planed_offers)} %."
54+
f" Average price: {'{:,}'.format(int(self.average_price)).replace(',', ' ')} rub",
55+
end="\r", flush=True)
56+
57+
def remove_unnecessary_fields(self):
58+
if self.is_sale():
59+
for not_need_field in SPECIFIC_FIELDS_FOR_RENT_LONG:
60+
if not_need_field in self.result[-1]:
61+
del self.result[-1][not_need_field]
62+
63+
for not_need_field in SPECIFIC_FIELDS_FOR_RENT_SHORT:
64+
if not_need_field in self.result[-1]:
65+
del self.result[-1][not_need_field]
66+
67+
if self.is_rent_long():
68+
for not_need_field in SPECIFIC_FIELDS_FOR_RENT_SHORT:
69+
if not_need_field in self.result[-1]:
70+
del self.result[-1][not_need_field]
71+
72+
for not_need_field in SPECIFIC_FIELDS_FOR_SALE:
73+
if not_need_field in self.result[-1]:
74+
del self.result[-1][not_need_field]
75+
76+
if self.is_rent_short():
77+
for not_need_field in SPECIFIC_FIELDS_FOR_RENT_LONG:
78+
if not_need_field in self.result[-1]:
79+
del self.result[-1][not_need_field]
80+
81+
for not_need_field in SPECIFIC_FIELDS_FOR_SALE:
82+
if not_need_field in self.result[-1]:
83+
del self.result[-1][not_need_field]
84+
85+
return self.result
86+
87+
def save_results(self):
88+
self.remove_unnecessary_fields()
89+
keys = self.result[0].keys()
90+
91+
with open(self.file_path, 'w', newline='', encoding='utf-8') as output_file:
92+
dict_writer = csv.DictWriter(output_file, keys, delimiter=';')
93+
dict_writer.writeheader()
94+
dict_writer.writerows(self.result)

0 commit comments

Comments
 (0)