@@ -32,16 +32,19 @@ def callee(session):
32
32
`date_window` Date NOT NULL,
33
33
`build_type` Utf8 NOT NULL,
34
34
`branch` Utf8 NOT NULL,
35
+ `first_run` Timestamp,
36
+ `last_run` Timestamp ,
37
+ `owners` Utf8 ,
35
38
`days_ago_window` Uint64 NOT NULL,
36
39
`history` String,
37
40
`history_class` String,
38
41
`pass_count` Uint64,
39
42
`mute_count` Uint64,
40
43
`fail_count` Uint64,
41
44
`skip_count` Uint64,
42
- PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window)
45
+ PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window, build_type, branch )
43
46
)
44
- PARTITION BY HASH(`full_name`)
47
+ PARTITION BY HASH(`full_name`,build_type,branch )
45
48
WITH (STORE = COLUMN)
46
49
""" )
47
50
@@ -56,6 +59,8 @@ def bulk_upsert(table_client, table_path, rows):
56
59
.add_column ("suite_folder" , ydb .OptionalType (ydb .PrimitiveType .Utf8 ))
57
60
.add_column ("build_type" , ydb .OptionalType (ydb .PrimitiveType .Utf8 ))
58
61
.add_column ("branch" , ydb .OptionalType (ydb .PrimitiveType .Utf8 ))
62
+ .add_column ("first_run" , ydb .OptionalType (ydb .PrimitiveType .Timestamp ))
63
+ .add_column ("last_run" , ydb .OptionalType (ydb .PrimitiveType .Timestamp ))
59
64
.add_column ("full_name" , ydb .OptionalType (ydb .PrimitiveType .Utf8 ))
60
65
.add_column ("date_window" , ydb .OptionalType (ydb .PrimitiveType .Date ))
61
66
.add_column ("days_ago_window" , ydb .OptionalType (ydb .PrimitiveType .Uint64 ))
@@ -71,7 +76,8 @@ def bulk_upsert(table_client, table_path, rows):
71
76
72
77
def main ():
73
78
parser = argparse .ArgumentParser ()
74
- parser .add_argument ('--days-window' , default = 5 , type = int , help = 'how many days back we collecting history' )
79
+
80
+ parser .add_argument ('--days-window' , default = 1 , type = int , help = 'how many days back we collecting history' )
75
81
parser .add_argument ('--build_type' ,choices = ['relwithdebinfo' , 'release-asan' ], default = 'relwithdebinfo' , type = str , help = 'build : relwithdebinfo or release-asan' )
76
82
parser .add_argument ('--branch' , default = 'main' ,choices = ['main' ], type = str , help = 'branch' )
77
83
@@ -108,14 +114,15 @@ def main():
108
114
tc_settings = ydb .TableClientSettings ().with_native_date_in_result_sets (enabled = True )
109
115
table_client = ydb .TableClient (driver , tc_settings )
110
116
111
- table_path = f'test_results/analytics/flaky_tests_history_ { history_for_n_day } _days'
112
- default_start_date = datetime .date (2024 , 7 , 1 )
117
+ table_path = f'test_results/analytics/flaky_tests_window_ { history_for_n_day } _days'
118
+ default_start_date = datetime .date (2024 , 9 , 1 )
113
119
114
120
with ydb .SessionPool (driver ) as pool :
115
121
create_tables (pool , table_path )
116
122
117
123
# geting last date from history
118
- last_date_query = f"select max(date_window) as max_date_window from `{ table_path } `"
124
+ last_date_query = f"""select max(date_window) as max_date_window from `{ table_path } `
125
+ where build_type = '{ build_type } ' and branch = '{ branch } '"""
119
126
query = ydb .ScanQuery (last_date_query , {})
120
127
it = table_client .scan_query (query )
121
128
results = []
@@ -126,119 +133,129 @@ def main():
126
133
except StopIteration :
127
134
break
128
135
129
- if results [0 ] and results [0 ].get ( 'max_date_window' , default_start_date ) is not None :
130
- last_date = results [0 ].get (
131
- 'max_date_window' , default_start_date ).strftime ('%Y-%m-%d' )
136
+ if results [0 ] and results [0 ].get ( 'max_date_window' , default_start_date ) is not None and results [0 ].get ( 'max_date_window' , default_start_date ) > default_start_date :
137
+ last_datetime = results [0 ].get (
138
+ 'max_date_window' , default_start_date )
139
+
132
140
else :
133
- last_date = default_start_date .strftime ('%Y-%m-%d' )
141
+ last_datetime = default_start_date
142
+
143
+ last_date = last_datetime .strftime ('%Y-%m-%d' )
134
144
135
145
print (f'last hisotry date: { last_date } ' )
136
146
# getting history for dates >= last_date
137
- query_get_history = f"""
138
- select
139
- full_name,
140
- date_base,
141
- history_list,
142
- dist_hist,
143
- suite_folder,
144
- test_name,
145
- '{ build_type } ' as build_type,
146
- '{ branch } ' as branch
147
- from (
147
+
148
+ today = datetime .date .today ()
149
+ date_list = [today - datetime .timedelta (days = x ) for x in range ((today - last_datetime ).days + 1 )]
150
+ for date in sorted (date_list ):
151
+ query_get_history = f"""
152
+
148
153
select
149
154
full_name,
150
155
date_base,
151
- AGG_LIST(status) as history_list ,
152
- String::JoinFromList( AGG_LIST_DISTINCT(status) ,',' ) as dist_hist,
156
+ history_list,
157
+ if(dist_hist = '','no_runs',dist_hist ) as dist_hist,
153
158
suite_folder,
154
- test_name
159
+ test_name,
160
+ build_type,
161
+ branch,
162
+ owners,
163
+ first_run,
164
+ last_run
165
+
155
166
from (
156
- select * from (
167
+ select
168
+ full_name,
169
+ date_base,
170
+ AGG_LIST(status) as history_list ,
171
+ String::JoinFromList( ListSort(AGG_LIST_DISTINCT(status)) ,',') as dist_hist,
172
+ suite_folder,
173
+ test_name,
174
+ owners,
175
+ build_type,
176
+ branch,
177
+ min(run_timestamp) as first_run,
178
+ max(run_timestamp) as last_run
179
+ from (
157
180
select * from (
158
- select DISTINCT
181
+
182
+ select distinct
159
183
full_name,
160
184
suite_folder,
161
- test_name
185
+ test_name,
186
+ owners,
187
+ Date('{ date } ') as date_base,
188
+ '{ build_type } ' as build_type,
189
+ '{ branch } ' as branch
162
190
from `test_results/analytics/testowners`
163
- where run_timestamp_last >= Date('{ last_date } ') - 3*Interval("P1D")
164
- ) as all_tests
165
- cross join (
166
- select
167
- DISTINCT DateTime::MakeDate(run_timestamp) as date_base
168
- from `test_results/test_runs_column`
169
- where
170
- (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
171
- and run_timestamp>= Date('{ last_date } ')
172
- ) as date_list
173
191
) as test_and_date
174
- left JOIN (
175
- select * from (
192
+ left JOIN (
193
+
176
194
select
177
195
suite_folder || '/' || test_name as full_name,
178
196
run_timestamp,
179
197
status
180
198
from `test_results/test_runs_column`
181
199
where
182
- run_timestamp >= Date('{ last_date } ') -{ history_for_n_day } *Interval("P1D")
200
+ run_timestamp <= Date('{ date } ') + Interval("P1D")
201
+ and run_timestamp >= Date('{ date } ') - { history_for_n_day } *Interval("P1D")
202
+
183
203
and (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
184
204
and build_type = '{ build_type } '
185
205
and branch = '{ branch } '
186
206
order by full_name,run_timestamp desc
187
- )
188
- ) as hist
189
- ON test_and_date.full_name=hist.full_name
190
- where
191
- hist.run_timestamp >= test_and_date.date_base -{ history_for_n_day } *Interval("P1D") AND
192
- hist.run_timestamp < test_and_date.date_base + Interval("P1D")
193
-
207
+
208
+ ) as hist
209
+ ON test_and_date.full_name=hist.full_name
210
+ )
211
+ GROUP BY full_name,suite_folder,test_name,date_base,build_type,branch,owners
194
212
)
195
- GROUP BY full_name,suite_folder,test_name,date_base
196
-
197
- )
198
- """
199
- query = ydb .ScanQuery (query_get_history , {})
200
- # start transaction time
201
- start_time = time .time ()
202
- it = driver .table_client .scan_query (query )
203
- # end transaction time
213
+ """
214
+ query = ydb .ScanQuery (query_get_history , {})
215
+ # start transaction time
216
+ start_time = time .time ()
217
+ it = driver .table_client .scan_query (query )
218
+ # end transaction time
204
219
205
- results = []
206
- prepared_for_update_rows = []
207
- while True :
208
- try :
209
- result = next (it )
210
- results = results + result .result_set .rows
211
- except StopIteration :
212
- break
213
- end_time = time .time ()
214
- print (f'transaction duration: { end_time - start_time } ' )
215
-
216
- print (f'history data captured, { len (results )} rows' )
217
- for row in results :
218
- row ['count' ] = dict (zip (list (row ['history_list' ]), [list (
219
- row ['history_list' ]).count (i ) for i in list (row ['history_list' ])]))
220
- prepared_for_update_rows .append ({
221
- 'suite_folder' : row ['suite_folder' ],
222
- 'test_name' : row ['test_name' ],
223
- 'full_name' : row ['full_name' ],
224
- 'date_window' : row ['date_base' ],
225
- 'days_ago_window' : history_for_n_day ,
226
- 'build_type' : row ['build_type' ],
227
- 'branch' : row ['branch' ],
228
- 'history' : ',' .join (row ['history_list' ]).encode ('utf8' ),
229
- 'history_class' : row ['dist_hist' ],
230
- 'pass_count' : row ['count' ].get ('passed' , 0 ),
231
- 'mute_count' : row ['count' ].get ('mute' , 0 ),
232
- 'fail_count' : row ['count' ].get ('failure' , 0 ),
233
- 'skip_count' : row ['count' ].get ('skipped' , 0 ),
234
- })
235
- print ('upserting history' )
236
- with ydb .SessionPool (driver ) as pool :
220
+ results = []
221
+ prepared_for_update_rows = []
222
+ while True :
223
+ try :
224
+ result = next (it )
225
+ results = results + result .result_set .rows
226
+ except StopIteration :
227
+ break
228
+ end_time = time .time ()
229
+ print (f'transaction duration: { end_time - start_time } ' )
237
230
238
- create_tables (pool , table_path )
239
- full_path = posixpath .join (DATABASE_PATH , table_path )
240
- bulk_upsert (driver .table_client , full_path ,
241
- prepared_for_update_rows )
231
+ print (f'history data captured, { len (results )} rows' )
232
+ for row in results :
233
+ row ['count' ] = dict (zip (list (row ['history_list' ]), [list (
234
+ row ['history_list' ]).count (i ) for i in list (row ['history_list' ])]))
235
+ prepared_for_update_rows .append ({
236
+ 'suite_folder' : row ['suite_folder' ],
237
+ 'test_name' : row ['test_name' ],
238
+ 'full_name' : row ['full_name' ],
239
+ 'date_window' : row ['date_base' ],
240
+ 'days_ago_window' : history_for_n_day ,
241
+ 'build_type' : row ['build_type' ],
242
+ 'branch' : row ['branch' ],
243
+ 'first_run' : row ['first_run' ],
244
+ 'last_run' : row ['last_run' ],
245
+ 'history' : ',' .join (row ['history_list' ]).encode ('utf8' ),
246
+ 'history_class' : row ['dist_hist' ],
247
+ 'pass_count' : row ['count' ].get ('passed' , 0 ),
248
+ 'mute_count' : row ['count' ].get ('mute' , 0 ),
249
+ 'fail_count' : row ['count' ].get ('failure' , 0 ),
250
+ 'skip_count' : row ['count' ].get ('skipped' , 0 ),
251
+ })
252
+ print (f'upserting history for date { date } ' )
253
+ with ydb .SessionPool (driver ) as pool :
254
+
255
+ create_tables (pool , table_path )
256
+ full_path = posixpath .join (DATABASE_PATH , table_path )
257
+ bulk_upsert (driver .table_client , full_path ,
258
+ prepared_for_update_rows )
242
259
243
260
print ('history updated' )
244
261
0 commit comments