Skip to content

Commit 0ecb283

Browse files
committed
Avoid too much lines in utils.py
1 parent be57f8f commit 0ecb283

File tree

4 files changed

+106
-76
lines changed

4 files changed

+106
-76
lines changed

tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
import warnings
1717
import platform
1818
import sys
19-
19+
import os
2020
import copy
2121
import json
22+
import tempfile
2223
from pathlib import Path
2324
import pickle # nosec B403 # disable import-pickle check
2425
from argparse import ArgumentParser
@@ -34,8 +35,7 @@
3435
)
3536
from ..data_readers import KaldiFrameIdentifier, KaldiMatrixIdentifier
3637
from ..utils import (
37-
get_path, OrderedSet, cast_to_bool, is_relative_to, start_telemetry, send_telemetry_event,
38-
end_telemetry, AtomicWriteFileHandle
38+
get_path, OrderedSet, cast_to_bool, is_relative_to, start_telemetry, send_telemetry_event, end_telemetry
3939
)
4040
from ..data_analyzer import BaseDataAnalyzer
4141
from .format_converter import BaseFormatConverter
@@ -410,3 +410,38 @@ def analyze_dataset(annotations, metadata):
410410
else:
411411
metadata = {'data_analysis': data_analysis}
412412
return metadata
413+
414+
class AtomicWriteFileHandle:
415+
"""Ensure the file is written once in case of multi processes or threads."""
416+
417+
def __init__(self, file_path, open_mode):
418+
self.target_path = file_path
419+
self.mode = open_mode
420+
421+
self.temp_fd, self.temp_path = tempfile.mkstemp(dir=os.path.dirname(file_path))
422+
self.temp_file = os.fdopen(self.temp_fd, open_mode)
423+
424+
def write(self, data):
425+
self.temp_file.write(data)
426+
427+
def writelines(self, lines):
428+
self.temp_file.writelines(lines)
429+
430+
def close(self):
431+
if not self.temp_file.closed:
432+
self.temp_file.close()
433+
if not os.path.exists(self.target_path):
434+
os.rename(self.temp_path, self.target_path)
435+
else:
436+
os.remove(self.temp_path)
437+
438+
def __enter__(self):
439+
return self
440+
441+
def __exit__(self, exc_type, exc_val, exc_tb):
442+
self.close()
443+
444+
# Mimic other file object methods as needed
445+
def __getattr__(self, item):
446+
"""Delegate attribute access to the underlying temporary file object."""
447+
return getattr(self.temp_file, item)

tools/accuracy_checker/accuracy_checker/utils.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import sys
2525
import zlib
2626
import re
27-
import tempfile
2827
from enum import Enum
2928

3029
from pathlib import Path
@@ -982,38 +981,3 @@ def ov_new_api_available():
982981
return True
983982
except ImportError:
984983
return False
985-
986-
987-
class AtomicWriteFileHandle:
988-
"""Ensure the file is written once in case of multi processes or threads."""
989-
990-
def __init__(self, file_path, open_mode):
991-
self.target_path = file_path
992-
self.mode = open_mode
993-
self.temp_fd, self.temp_path = tempfile.mkstemp(dir=os.path.dirname(file_path))
994-
self.temp_file = os.fdopen(self.temp_fd, open_mode)
995-
996-
def write(self, data):
997-
self.temp_file.write(data)
998-
999-
def writelines(self, lines):
1000-
self.temp_file.writelines(lines)
1001-
1002-
def close(self):
1003-
if not self.temp_file.closed:
1004-
self.temp_file.close()
1005-
if not os.path.exists(self.target_path):
1006-
os.rename(self.temp_path, self.target_path)
1007-
else:
1008-
os.remove(self.temp_path)
1009-
1010-
def __enter__(self):
1011-
return self
1012-
1013-
def __exit__(self, exc_type, exc_val, exc_tb):
1014-
self.close()
1015-
1016-
# Mimic other file object methods as needed
1017-
def __getattr__(self, item):
1018-
"""Delegate attribute access to the underlying temporary file object."""
1019-
return getattr(self.temp_file, item)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
Copyright (c) 2018-2024 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
import os
18+
import threading
19+
import warnings
20+
from accuracy_checker.annotation_converters.convert import AtomicWriteFileHandle
21+
22+
def thread_access_file(file_path, data_dict, thread_id, write_lines):
23+
if os.path.exists(file_path):
24+
with open(file_path, 'r') as file:
25+
read_lines = len(file.readlines())
26+
# when new thread sees file it must have all lines already written
27+
if read_lines != write_lines:
28+
warn_message = f"Thread {thread_id}: Incorrect number of lines read from {file_path} ({read_lines} != {write_lines})"
29+
warnings.warn(warn_message)
30+
data_dict['assert'] = warn_message
31+
else:
32+
with AtomicWriteFileHandle(file_path, 'wt') as file:
33+
for i in range(write_lines):
34+
file.write(f"Thread {thread_id}:Line{i} {data_dict[thread_id]}\n")
35+
36+
class TestAtomicWriteFileHandle:
37+
38+
def test_multithreaded_atomic_file_write(self):
39+
target_file_path = "test_atomic_file.txt"
40+
threads = []
41+
num_threads = 10
42+
write_lines = 10
43+
data_chunks = [f"Data chunk {i}" for i in range(num_threads)]
44+
threads_dict = {i: data_chunks[i] for i in range(len(data_chunks))}
45+
46+
if os.path.exists(target_file_path):
47+
os.remove(target_file_path)
48+
49+
for i in range(num_threads):
50+
thread = threading.Thread(target=thread_access_file, args=(target_file_path, threads_dict, i, write_lines))
51+
threads.append(thread)
52+
53+
for thread in threads:
54+
thread.start()
55+
56+
for i,thread in enumerate(threads):
57+
thread.join()
58+
59+
with open(target_file_path, 'r') as file:
60+
lines = file.readlines()
61+
62+
os.remove(target_file_path)
63+
64+
# check asserts passed from threads
65+
assert 'assert' not in threads_dict.keys() , threads_dict['assert']
66+
67+
assert sum(1 for line in lines for data_chunk in data_chunks if data_chunk in line) == write_lines, f"data_chunks data not found in the {target_file_path} file"

tools/accuracy_checker/tests/test_utils.py

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414
limitations under the License.
1515
"""
1616

17-
import os
18-
import threading
19-
from accuracy_checker.utils import concat_lists, contains_all, contains_any, overrides, zipped_transform, AtomicWriteFileHandle
17+
from accuracy_checker.utils import concat_lists, contains_all, contains_any, overrides, zipped_transform
2018

2119

2220
def test_concat_lists():
@@ -127,37 +125,3 @@ class C:
127125

128126
assert overrides(B, 'foo', A)
129127
assert not overrides(C, 'foo', A)
130-
131-
132-
def thread_write_to_file(file_path, data, thread_id):
133-
with AtomicWriteFileHandle(file_path, 'wt') as file:
134-
file.write(f"Thread {thread_id}: {data}\n")
135-
136-
137-
class TestAtomicWriteFileHandle:
138-
139-
def test_multithreaded_atomic_file_write(self):
140-
target_file_path = "test_atomic_file.txt"
141-
threads = []
142-
num_threads = 8
143-
data_chunks = [f"Data chunk {i}" for i in range(num_threads)]
144-
145-
if os.path.exists(target_file_path):
146-
os.remove(target_file_path)
147-
148-
for i in range(num_threads):
149-
thread = threading.Thread(target=thread_write_to_file, args=(target_file_path, data_chunks[i], i))
150-
threads.append(thread)
151-
152-
for thread in threads:
153-
thread.start()
154-
155-
for thread in threads:
156-
thread.join()
157-
158-
with open(target_file_path, 'r') as file:
159-
lines = file.readlines()
160-
161-
os.remove(target_file_path)
162-
163-
assert any(data_chunk in line for line in lines for data_chunk in data_chunks), f"data_chunks data not found in the file"

0 commit comments

Comments
 (0)