Skip to content

Commit 9e8c4c6

Browse files
authored
Fix bulk upsert rewrite results asan/relwithdevinfo in analytics (#9521)
1 parent 60dc4df commit 9e8c4c6

File tree

3 files changed

+110
-96
lines changed

3 files changed

+110
-96
lines changed

.github/scripts/analytics/flaky_tests_history.py

Lines changed: 109 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,19 @@ def callee(session):
3232
`date_window` Date NOT NULL,
3333
`build_type` Utf8 NOT NULL,
3434
`branch` Utf8 NOT NULL,
35+
`first_run` Timestamp,
36+
`last_run` Timestamp ,
37+
`owners` Utf8 ,
3538
`days_ago_window` Uint64 NOT NULL,
3639
`history` String,
3740
`history_class` String,
3841
`pass_count` Uint64,
3942
`mute_count` Uint64,
4043
`fail_count` Uint64,
4144
`skip_count` Uint64,
42-
PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window)
45+
PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window, build_type, branch)
4346
)
44-
PARTITION BY HASH(`full_name`)
47+
PARTITION BY HASH(`full_name`,build_type,branch)
4548
WITH (STORE = COLUMN)
4649
""")
4750

@@ -56,6 +59,8 @@ def bulk_upsert(table_client, table_path, rows):
5659
.add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
5760
.add_column("build_type", ydb.OptionalType(ydb.PrimitiveType.Utf8))
5861
.add_column("branch", ydb.OptionalType(ydb.PrimitiveType.Utf8))
62+
.add_column("first_run", ydb.OptionalType(ydb.PrimitiveType.Timestamp))
63+
.add_column("last_run", ydb.OptionalType(ydb.PrimitiveType.Timestamp))
5964
.add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
6065
.add_column("date_window", ydb.OptionalType(ydb.PrimitiveType.Date))
6166
.add_column("days_ago_window", ydb.OptionalType(ydb.PrimitiveType.Uint64))
@@ -71,7 +76,8 @@ def bulk_upsert(table_client, table_path, rows):
7176

7277
def main():
7378
parser = argparse.ArgumentParser()
74-
parser.add_argument('--days-window', default=5, type=int, help='how many days back we collecting history')
79+
80+
parser.add_argument('--days-window', default=1, type=int, help='how many days back we collecting history')
7581
parser.add_argument('--build_type',choices=['relwithdebinfo', 'release-asan'], default='relwithdebinfo', type=str, help='build : relwithdebinfo or release-asan')
7682
parser.add_argument('--branch', default='main',choices=['main'], type=str, help='branch')
7783

@@ -108,14 +114,15 @@ def main():
108114
tc_settings = ydb.TableClientSettings().with_native_date_in_result_sets(enabled=True)
109115
table_client = ydb.TableClient(driver, tc_settings)
110116

111-
table_path = f'test_results/analytics/flaky_tests_history_{history_for_n_day}_days'
112-
default_start_date = datetime.date(2024, 7, 1)
117+
table_path = f'test_results/analytics/flaky_tests_window_{history_for_n_day}_days'
118+
default_start_date = datetime.date(2024, 9, 1)
113119

114120
with ydb.SessionPool(driver) as pool:
115121
create_tables(pool, table_path)
116122

117123
# geting last date from history
118-
last_date_query = f"select max(date_window) as max_date_window from `{table_path}`"
124+
last_date_query = f"""select max(date_window) as max_date_window from `{table_path}`
125+
where build_type = '{build_type}' and branch = '{branch}'"""
119126
query = ydb.ScanQuery(last_date_query, {})
120127
it = table_client.scan_query(query)
121128
results = []
@@ -126,119 +133,129 @@ def main():
126133
except StopIteration:
127134
break
128135

129-
if results[0] and results[0].get( 'max_date_window', default_start_date) is not None:
130-
last_date = results[0].get(
131-
'max_date_window', default_start_date).strftime('%Y-%m-%d')
136+
if results[0] and results[0].get( 'max_date_window', default_start_date) is not None and results[0].get( 'max_date_window', default_start_date) > default_start_date:
137+
last_datetime = results[0].get(
138+
'max_date_window', default_start_date)
139+
132140
else:
133-
last_date = default_start_date.strftime('%Y-%m-%d')
141+
last_datetime = default_start_date
142+
143+
last_date = last_datetime.strftime('%Y-%m-%d')
134144

135145
print(f'last hisotry date: {last_date}')
136146
# getting history for dates >= last_date
137-
query_get_history = f"""
138-
select
139-
full_name,
140-
date_base,
141-
history_list,
142-
dist_hist,
143-
suite_folder,
144-
test_name,
145-
'{build_type}' as build_type,
146-
'{branch}' as branch
147-
from (
147+
148+
today = datetime.date.today()
149+
date_list = [today - datetime.timedelta(days=x) for x in range((today - last_datetime).days+1)]
150+
for date in sorted(date_list):
151+
query_get_history = f"""
152+
148153
select
149154
full_name,
150155
date_base,
151-
AGG_LIST(status) as history_list ,
152-
String::JoinFromList( AGG_LIST_DISTINCT(status) ,',') as dist_hist,
156+
history_list,
157+
if(dist_hist = '','no_runs',dist_hist) as dist_hist,
153158
suite_folder,
154-
test_name
159+
test_name,
160+
build_type,
161+
branch,
162+
owners,
163+
first_run,
164+
last_run
165+
155166
from (
156-
select * from (
167+
select
168+
full_name,
169+
date_base,
170+
AGG_LIST(status) as history_list ,
171+
String::JoinFromList( ListSort(AGG_LIST_DISTINCT(status)) ,',') as dist_hist,
172+
suite_folder,
173+
test_name,
174+
owners,
175+
build_type,
176+
branch,
177+
min(run_timestamp) as first_run,
178+
max(run_timestamp) as last_run
179+
from (
157180
select * from (
158-
select DISTINCT
181+
182+
select distinct
159183
full_name,
160184
suite_folder,
161-
test_name
185+
test_name,
186+
owners,
187+
Date('{date}') as date_base,
188+
'{build_type}' as build_type,
189+
'{branch}' as branch
162190
from `test_results/analytics/testowners`
163-
where run_timestamp_last >= Date('{last_date}') - 3*Interval("P1D")
164-
) as all_tests
165-
cross join (
166-
select
167-
DISTINCT DateTime::MakeDate(run_timestamp) as date_base
168-
from `test_results/test_runs_column`
169-
where
170-
(job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
171-
and run_timestamp>= Date('{last_date}')
172-
) as date_list
173191
) as test_and_date
174-
left JOIN (
175-
select * from (
192+
left JOIN (
193+
176194
select
177195
suite_folder || '/' || test_name as full_name,
178196
run_timestamp,
179197
status
180198
from `test_results/test_runs_column`
181199
where
182-
run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
200+
run_timestamp <= Date('{date}') + Interval("P1D")
201+
and run_timestamp >= Date('{date}') - {history_for_n_day}*Interval("P1D")
202+
183203
and (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
184204
and build_type = '{build_type}'
185205
and branch = '{branch}'
186206
order by full_name,run_timestamp desc
187-
)
188-
) as hist
189-
ON test_and_date.full_name=hist.full_name
190-
where
191-
hist.run_timestamp >= test_and_date.date_base -{history_for_n_day}*Interval("P1D") AND
192-
hist.run_timestamp < test_and_date.date_base + Interval("P1D")
193-
207+
208+
) as hist
209+
ON test_and_date.full_name=hist.full_name
210+
)
211+
GROUP BY full_name,suite_folder,test_name,date_base,build_type,branch,owners
194212
)
195-
GROUP BY full_name,suite_folder,test_name,date_base
196-
197-
)
198-
"""
199-
query = ydb.ScanQuery(query_get_history, {})
200-
# start transaction time
201-
start_time = time.time()
202-
it = driver.table_client.scan_query(query)
203-
# end transaction time
213+
"""
214+
query = ydb.ScanQuery(query_get_history, {})
215+
# start transaction time
216+
start_time = time.time()
217+
it = driver.table_client.scan_query(query)
218+
# end transaction time
204219

205-
results = []
206-
prepared_for_update_rows = []
207-
while True:
208-
try:
209-
result = next(it)
210-
results = results + result.result_set.rows
211-
except StopIteration:
212-
break
213-
end_time = time.time()
214-
print(f'transaction duration: {end_time - start_time}')
215-
216-
print(f'history data captured, {len(results)} rows')
217-
for row in results:
218-
row['count'] = dict(zip(list(row['history_list']), [list(
219-
row['history_list']).count(i) for i in list(row['history_list'])]))
220-
prepared_for_update_rows.append({
221-
'suite_folder': row['suite_folder'],
222-
'test_name': row['test_name'],
223-
'full_name': row['full_name'],
224-
'date_window': row['date_base'],
225-
'days_ago_window': history_for_n_day,
226-
'build_type': row['build_type'],
227-
'branch': row['branch'],
228-
'history': ','.join(row['history_list']).encode('utf8'),
229-
'history_class': row['dist_hist'],
230-
'pass_count': row['count'].get('passed', 0),
231-
'mute_count': row['count'].get('mute', 0),
232-
'fail_count': row['count'].get('failure', 0),
233-
'skip_count': row['count'].get('skipped', 0),
234-
})
235-
print('upserting history')
236-
with ydb.SessionPool(driver) as pool:
220+
results = []
221+
prepared_for_update_rows = []
222+
while True:
223+
try:
224+
result = next(it)
225+
results = results + result.result_set.rows
226+
except StopIteration:
227+
break
228+
end_time = time.time()
229+
print(f'transaction duration: {end_time - start_time}')
237230

238-
create_tables(pool, table_path)
239-
full_path = posixpath.join(DATABASE_PATH, table_path)
240-
bulk_upsert(driver.table_client, full_path,
241-
prepared_for_update_rows)
231+
print(f'history data captured, {len(results)} rows')
232+
for row in results:
233+
row['count'] = dict(zip(list(row['history_list']), [list(
234+
row['history_list']).count(i) for i in list(row['history_list'])]))
235+
prepared_for_update_rows.append({
236+
'suite_folder': row['suite_folder'],
237+
'test_name': row['test_name'],
238+
'full_name': row['full_name'],
239+
'date_window': row['date_base'],
240+
'days_ago_window': history_for_n_day,
241+
'build_type': row['build_type'],
242+
'branch': row['branch'],
243+
'first_run': row['first_run'],
244+
'last_run': row['last_run'],
245+
'history': ','.join(row['history_list']).encode('utf8'),
246+
'history_class': row['dist_hist'],
247+
'pass_count': row['count'].get('passed', 0),
248+
'mute_count': row['count'].get('mute', 0),
249+
'fail_count': row['count'].get('failure', 0),
250+
'skip_count': row['count'].get('skipped', 0),
251+
})
252+
print(f'upserting history for date {date}')
253+
with ydb.SessionPool(driver) as pool:
254+
255+
create_tables(pool, table_path)
256+
full_path = posixpath.join(DATABASE_PATH, table_path)
257+
bulk_upsert(driver.table_client, full_path,
258+
prepared_for_update_rows)
242259

243260
print('history updated')
244261

.github/scripts/analytics/flaky_tests_history_n_runs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ def main():
185185
'{branch}' as branch
186186
from `test_results/analytics/testowners` as t1
187187
where run_timestamp_last >= Date('{date}') - 3*Interval("P1D")
188+
and run_timestamp_last <= Date('{date}') + Interval("P1D")
188189
) as test_and_date
189190
left JOIN (
190191
select * from (

.github/workflows/collect_analytics.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ jobs:
2929
python3 -m pip install ydb ydb[yc] codeowners
3030
- name: Collect testowners
3131
run: python3 .github/scripts/analytics/upload_testowners.py
32-
- name: Collect test history data with window 5 days relwithdebinfo for main
33-
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5
34-
- name: Collect test history data with window 5 days release-asan for main
35-
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5 --build_type=release-asan
3632
- name: Collect test history data with window 1 days relwithdebinfo for main
3733
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=1
3834
- name: Collect test history data with window 1 days release-asan for main

0 commit comments

Comments
 (0)