1
+ import math
2
+ import csv
3
+ import pathlib
4
+ from datetime import datetime
5
+ import transliterate
6
+
7
+ from cianparser .constants import FILE_NAME_BASE , SPECIFIC_FIELDS_FOR_RENT_LONG , SPECIFIC_FIELDS_FOR_RENT_SHORT , SPECIFIC_FIELDS_FOR_SALE
8
+
9
+
10
+ class BaseListPageParser :
11
+ def __init__ (self ,
12
+ session ,
13
+ deal_type : str , rent_period_type , location_name : str ,
14
+ with_saving_csv = False , with_extra_data = False ,
15
+ object_type = None , additional_settings = None ):
16
+ self .accommodation_type = "suburban"
17
+ self .session = session
18
+ self .deal_type = deal_type
19
+ self .rent_period_type = rent_period_type
20
+ self .location_name = location_name
21
+ self .with_saving_csv = with_saving_csv
22
+ self .with_extra_data = with_extra_data
23
+ self .additional_settings = additional_settings
24
+ self .object_type = object_type
25
+
26
+ self .result = []
27
+ self .result_set = set ()
28
+ self .average_price = 0
29
+ self .count_parsed_offers = 0
30
+ self .start_page = 1 if (additional_settings is None or "start_page" not in additional_settings .keys ()) else additional_settings ["start_page" ]
31
+ self .end_page = 100 if (additional_settings is None or "end_page" not in additional_settings .keys ()) else additional_settings ["end_page" ]
32
+ self .file_path = self .build_file_path ()
33
+
34
+ def is_sale (self ):
35
+ return self .deal_type == "sale"
36
+
37
+ def is_rent_long (self ):
38
+ return self .deal_type == "rent" and self .rent_period_type == 4
39
+
40
+ def is_rent_short (self ):
41
+ return self .deal_type == "rent" and self .rent_period_type == 2
42
+
43
+ def build_file_path (self ):
44
+ now_time = datetime .now ().strftime ("%d_%b_%Y_%H_%M_%S_%f" )
45
+ file_name = FILE_NAME_BASE .format (self .accommodation_type , self .deal_type , self .start_page , self .end_page , transliterate .translit (self .location_name .lower (), reversed = True ), now_time )
46
+ return pathlib .Path (pathlib .Path .cwd (), file_name .replace ("'" , "" ))
47
+
48
+ def print_parse_progress (self , page_number , count_of_pages , offers , ind ):
49
+ total_planed_offers = len (offers ) * count_of_pages
50
+ print (f"\r { page_number - self .start_page + 1 } "
51
+ f" | { page_number } page with list: [" + "=>" * (ind + 1 ) + " " * (len (offers ) - ind - 1 ) + "]" + f" { math .ceil ((ind + 1 ) * 100 / len (offers ))} " + "%" +
52
+ f" | Count of all parsed: { self .count_parsed_offers } ."
53
+ f" Progress ratio: { math .ceil (self .count_parsed_offers * 100 / total_planed_offers )} %."
54
+ f" Average price: { '{:,}' .format (int (self .average_price )).replace (',' , ' ' )} rub" ,
55
+ end = "\r " , flush = True )
56
+
57
+ def remove_unnecessary_fields (self ):
58
+ if self .is_sale ():
59
+ for not_need_field in SPECIFIC_FIELDS_FOR_RENT_LONG :
60
+ if not_need_field in self .result [- 1 ]:
61
+ del self .result [- 1 ][not_need_field ]
62
+
63
+ for not_need_field in SPECIFIC_FIELDS_FOR_RENT_SHORT :
64
+ if not_need_field in self .result [- 1 ]:
65
+ del self .result [- 1 ][not_need_field ]
66
+
67
+ if self .is_rent_long ():
68
+ for not_need_field in SPECIFIC_FIELDS_FOR_RENT_SHORT :
69
+ if not_need_field in self .result [- 1 ]:
70
+ del self .result [- 1 ][not_need_field ]
71
+
72
+ for not_need_field in SPECIFIC_FIELDS_FOR_SALE :
73
+ if not_need_field in self .result [- 1 ]:
74
+ del self .result [- 1 ][not_need_field ]
75
+
76
+ if self .is_rent_short ():
77
+ for not_need_field in SPECIFIC_FIELDS_FOR_RENT_LONG :
78
+ if not_need_field in self .result [- 1 ]:
79
+ del self .result [- 1 ][not_need_field ]
80
+
81
+ for not_need_field in SPECIFIC_FIELDS_FOR_SALE :
82
+ if not_need_field in self .result [- 1 ]:
83
+ del self .result [- 1 ][not_need_field ]
84
+
85
+ return self .result
86
+
87
+ def save_results (self ):
88
+ self .remove_unnecessary_fields ()
89
+ keys = self .result [0 ].keys ()
90
+
91
+ with open (self .file_path , 'w' , newline = '' , encoding = 'utf-8' ) as output_file :
92
+ dict_writer = csv .DictWriter (output_file , keys , delimiter = ';' )
93
+ dict_writer .writeheader ()
94
+ dict_writer .writerows (self .result )
0 commit comments