Skip to content

Add more test cases for cautious-robot #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4f7b4f6
Add more test cases for cautious-robot
zoeduan Jul 24, 2024
7aeeb31
Update tests/test_buddycheck.py
zoeduan Jul 26, 2024
f76bca7
Update tests/test_download_images.py
zoeduan Jul 26, 2024
be63bdd
update the test_buddycheck.py: 1. add another buddycheck instance wit…
zoeduan Jul 26, 2024
cbb2696
Merge branch 'feature/enhance-testing' of github.com:Imageomics/cauti…
zoeduan Jul 26, 2024
ac351d0
Merge remote-tracking branch 'origin/main' into feature/enhance-testing
zoeduan Jul 26, 2024
84e3db2
update test_logging
zoeduan Jul 26, 2024
8ede4e4
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
b182c43
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
705c3a7
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
8b803ab
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
6e9a6f9
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
ced10d5
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
66e9328
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
e01a1e6
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
36f2a08
remove test_check_alignment_case_insensitive_columns
zoeduan Jul 27, 2024
c527cec
Replace the retry template with a loop over retry codes to handle bot…
zoeduan Jul 27, 2024
279ea43
Update test_downsampled_image_creation
zoeduan Jul 28, 2024
4947850
Update tests/test_buddycheck.py
zoeduan Jul 29, 2024
1c7b535
Update tests/test_buddycheck.py
zoeduan Jul 29, 2024
e92f93a
1. Update test_success_after_retries and test_failure_after_retries 2…
zoeduan Jul 29, 2024
2a31f5c
fix downsize read bug
egrace479 Jul 29, 2024
463d147
Update test_downsampled_image_creation(_with_subfolder)
zoeduan Jul 29, 2024
fd9f8b2
Updated test_download_images.py to make it run faster
zoeduan Jul 30, 2024
d752b8d
Update tests/test_download_images.py
zoeduan Jul 30, 2024
16bda3a
Update tests/test_download_images.py
zoeduan Jul 30, 2024
90237f1
"Add sample images from Rare-Species dataset (8511cb36-ea18-419a-b938…
zoeduan Jul 30, 2024
df58073
Custom error when validating empty input df
thompsonmj Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/cautiousrobot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,17 @@ def download_images(data, img_dir, log_filepath, error_log_filepath, filename =
if os.path.exists(downsample_dir_path) != True:
os.makedirs(downsample_dir_path, exist_ok=False)
# Downsample & save image
byte_data = io.BytesIO(response.content)
img = Image.open(byte_data)
#img.save(dest_path)
img.resize((downsample, downsample)).save(downsample_dir_path + "/" + image_name)
try:
img = Image.open(f"{image_dir_path}/{image_name}")
img.resize((downsample, downsample)).save(downsample_dir_path + "/" + image_name)
except Exception as e:
print(e)
log_errors = log_response(log_errors,
index = i,
image = "downsized_" + image_name,
url = url,
response_code = str(e))
update_log(log = log_errors, index = i, filepath = error_log_filepath)

# check for too many requests
elif response.status_code in REDO_CODE_LIST:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_buddycheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ def test_merge_on_checksum(self):

merged_df = self.buddy_check_filename.merge_on_checksum(source_df, checksum_df, 'checksum')
expected_df = pd.DataFrame({
'filename': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'filename_x': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'checksum': ['abc123', 'def456', 'ghi789'],
'filename_y': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'md5': ['abc123', 'def456', 'ghi789']
})
pd.testing.assert_frame_equal(merged_df, expected_df)
Expand Down
95 changes: 73 additions & 22 deletions tests/test_download_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,50 @@
import requests
import base64
from cautiousrobot.__main__ import download_images, main
from http.server import HTTPServer, SimpleHTTPRequestHandler
import threading

TESTDATA_DIR = os.path.join(os.path.dirname(__file__), 'testdata')

class CustomHTTPRequestHandler(SimpleHTTPRequestHandler):
def translate_path(self, path):
return os.path.join(TESTDATA_DIR, os.path.relpath(path, '/'))

class TestDownload(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.httpd = HTTPServer(('localhost', 9201), CustomHTTPRequestHandler)
cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
cls.server_thread.start()
print(f"Serving {TESTDATA_DIR} on http://localhost:9201")

@classmethod
def tearDownClass(cls):
cls.httpd.shutdown()
cls.server_thread.join()


def setUp(self):
self.DUMMY_DATA = pd.DataFrame(data={
"filename": ["test_file1", "test_file2"],
"file_url": ["http://test_url1.com/image.jpg", "http://test_url2.com/image.jpg"],
"filename": ["test_file1.jpg", "test_file2.jpg"],
"file_url": ["http://localhost:9201/images/image1.jpg", "http://localhost:9201/images/image2.png"],
"subfolder": ["test_subfolder1", "test_subfolder2"]
})
self.IMG_DIR = "test_dir"
self.LOG_FILEPATH = "test_log_path.jsonl"
self.ERROR_LOG_FILEPATH = "test_error_log_path.jsonl"
self.DOWNSAMPLE_PATH = "test_downsample_dir"
self.DOWNSAMPLE_DIR = self.IMG_DIR + "_downsized"
self.DOWNSAMPLE_SIZE = 100

os.makedirs(self.IMG_DIR, exist_ok=True)
os.makedirs(self.DOWNSAMPLE_PATH, exist_ok=True)
os.makedirs(self.DOWNSAMPLE_DIR, exist_ok=True)
for subfolder in self.DUMMY_DATA["subfolder"]:
os.makedirs(os.path.join(self.DOWNSAMPLE_DIR, subfolder), exist_ok=True)

def tearDown(self):
shutil.rmtree(self.IMG_DIR, ignore_errors=True)
shutil.rmtree(self.DOWNSAMPLE_PATH, ignore_errors=True)
shutil.rmtree(self.DOWNSAMPLE_DIR, ignore_errors=True)
if os.path.exists(self.LOG_FILEPATH):
os.remove(self.LOG_FILEPATH)
if os.path.exists(self.ERROR_LOG_FILEPATH):
Expand All @@ -54,7 +78,22 @@ def test_successful_download(self, get_mock):
self.assertTrue(os.path.isfile(f"{self.IMG_DIR}/{filename}"))

@patch('requests.get')
def test_success_after_retries(self, get_mock):
def test_successful_download_with_subfolder(self, get_mock):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.raw = BytesIO(b"fake_image_data")
get_mock.return_value = mock_response

download_images(self.DUMMY_DATA, self.IMG_DIR, self.LOG_FILEPATH, self.ERROR_LOG_FILEPATH, subfolders="subfolder")

for i, filename in enumerate(self.DUMMY_DATA['filename']):
subfolder = self.DUMMY_DATA['subfolder'][i]
self.assertTrue(os.path.isfile(f"{self.IMG_DIR}/{subfolder}/{filename}"))


@patch('requests.get')
@patch('time.sleep', return_value=None)
def test_success_after_retries(self,sleep_mock, get_mock):
retry_status_codes = [429, 500, 502, 503, 504]
for status_code in retry_status_codes:
with self.subTest(status_code=status_code):
Expand All @@ -63,39 +102,51 @@ def test_success_after_retries(self, get_mock):
mock_response_success = MagicMock()
mock_response_success.status_code = 200
mock_response_success.raw = BytesIO(b"fake_image_data")
get_mock.side_effect = [mock_response_retry] * 2 + [mock_response_success]
get_mock.side_effect = [
mock_response_retry, mock_response_retry, mock_response_success, # For test_file1.jpg
mock_response_retry, mock_response_retry, mock_response_success # For test_file2.jpg
]

download_images(self.DUMMY_DATA, self.IMG_DIR, self.LOG_FILEPATH, self.ERROR_LOG_FILEPATH, retry=3)

for filename in self.DUMMY_DATA['filename']:
self.assertTrue(os.path.isfile(f"{self.IMG_DIR}/{filename}"))
for filename in self.DUMMY_DATA['filename']:
self.assertTrue(os.path.isfile(f"{self.IMG_DIR}/{filename}"))

@patch('requests.get')
def test_failure_after_retries(self, get_mock):
@patch('time.sleep', return_value=None)
def test_failure_after_retries(self, sleep_mock,get_mock):
retry_status_codes = [429, 500, 502, 503, 504]
for status_code in retry_status_codes:
with self.subTest(status_code=status_code):
mock_response_retry = MagicMock()
mock_response_retry.status_code = status_code
get_mock.side_effect = [mock_response_retry] * 5
get_mock.side_effect = [
mock_response_retry, mock_response_retry, mock_response_retry, mock_response_retry, mock_response_retry, # For test_file1.jpg
mock_response_retry, mock_response_retry, mock_response_retry, mock_response_retry, mock_response_retry # For test_file2.jpg
]

download_images(self.DUMMY_DATA, self.IMG_DIR, self.LOG_FILEPATH, self.ERROR_LOG_FILEPATH, retry=5)

for filename in self.DUMMY_DATA['filename']:
self.assertFalse(os.path.isfile(f"{self.IMG_DIR}/{filename}"))

@patch('cautiousrobot.__main__.requests.get')
def test_downsampled_image_creation(self,get_mock):
mock_response = MagicMock()
mock_response.content = b'dummy_image_data'
get_mock.return_value = mock_response
for filename in self.DUMMY_DATA['filename']:
self.assertFalse(os.path.isfile(f"{self.IMG_DIR}/{filename}"))

def test_downsampled_image_creation(self):
download_images(self.DUMMY_DATA, self.IMG_DIR, self.LOG_FILEPATH, self.ERROR_LOG_FILEPATH,
downsample_path=self.DOWNSAMPLE_PATH, downsample=self.DOWNSAMPLE_SIZE)
downsample_path=self.DOWNSAMPLE_DIR, downsample=self.DOWNSAMPLE_SIZE)

for filename in self.DUMMY_DATA['filename']:
self.assertTrue(os.path.isfile(f"{self.DOWNSAMPLE_PATH}/test_subfolder1/{filename}") or
os.path.isfile(f"{self.DOWNSAMPLE_PATH}/test_subfolder2/{filename}"))
downsampled_path = os.path.join(self.DOWNSAMPLE_DIR, filename)
print(f"Checking existence of downsampled image: {downsampled_path}")
self.assertTrue(os.path.isfile(f"{self.DOWNSAMPLE_DIR}/{filename}") or
os.path.isfile(f"{self.DOWNSAMPLE_DIR}/{filename}"))

def test_downsampled_image_creation_with_subfolder(self):
download_images(self.DUMMY_DATA, self.IMG_DIR, self.LOG_FILEPATH, self.ERROR_LOG_FILEPATH,
downsample_path=self.DOWNSAMPLE_DIR, downsample=self.DOWNSAMPLE_SIZE, subfolders="subfolder")

for i, filename in enumerate(self.DUMMY_DATA['filename']):
subfolder = self.DUMMY_DATA['subfolder'][i]
self.assertTrue(os.path.isfile(f"{self.DOWNSAMPLE_DIR}/{subfolder}/{filename}"))

@patch('requests.get')
def test_logging(self, get_mock):
Expand Down
Binary file added tests/testdata/images/image1.jpg
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the source of this (and the other) image?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I downloaded them from google image. If it is inappropriate, we can use other images.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the source of this (and the other) image?

Thank you! @egrace479 I have updated the testing images.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/testdata/images/image2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.