Skip to content

Add more test cases for cautious-robot #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4f7b4f6
Add more test cases for cautious-robot
zoeduan Jul 24, 2024
7aeeb31
Update tests/test_buddycheck.py
zoeduan Jul 26, 2024
f76bca7
Update tests/test_download_images.py
zoeduan Jul 26, 2024
be63bdd
update the test_buddycheck.py: 1. add another buddycheck instance wit…
zoeduan Jul 26, 2024
cbb2696
Merge branch 'feature/enhance-testing' of github.com:Imageomics/cauti…
zoeduan Jul 26, 2024
ac351d0
Merge remote-tracking branch 'origin/main' into feature/enhance-testing
zoeduan Jul 26, 2024
84e3db2
update test_logging
zoeduan Jul 26, 2024
8ede4e4
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
b182c43
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
705c3a7
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
8b803ab
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
6e9a6f9
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
ced10d5
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
66e9328
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
e01a1e6
Update tests/test_buddycheck.py
zoeduan Jul 27, 2024
36f2a08
remove test_check_alignment_case_insensitive_columns
zoeduan Jul 27, 2024
c527cec
Replace the retry template with a loop over retry codes to handle bot…
zoeduan Jul 27, 2024
279ea43
Update test_downsampled_image_creation
zoeduan Jul 28, 2024
4947850
Update tests/test_buddycheck.py
zoeduan Jul 29, 2024
1c7b535
Update tests/test_buddycheck.py
zoeduan Jul 29, 2024
e92f93a
1. Update test_success_after_retries and test_failure_after_retries 2…
zoeduan Jul 29, 2024
2a31f5c
fix downsize read bug
egrace479 Jul 29, 2024
463d147
Update test_downsampled_image_creation(_with_subfolder)
zoeduan Jul 29, 2024
fd9f8b2
Updated test_download_images.py to make it run faster
zoeduan Jul 30, 2024
d752b8d
Update tests/test_download_images.py
zoeduan Jul 30, 2024
16bda3a
Update tests/test_download_images.py
zoeduan Jul 30, 2024
90237f1
"Add sample images from Rare-Species dataset (8511cb36-ea18-419a-b938…
zoeduan Jul 30, 2024
df58073
Custom error when validating empty input df
thompsonmj Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/cautiousrobot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,17 @@ def download_images(data, img_dir, log_filepath, error_log_filepath, filename =
if os.path.exists(downsample_dir_path) != True:
os.makedirs(downsample_dir_path, exist_ok=False)
# Downsample & save image
byte_data = io.BytesIO(response.content)
img = Image.open(byte_data)
#img.save(dest_path)
img.resize((downsample, downsample)).save(downsample_dir_path + "/" + image_name)
try:
img = Image.open(f"{image_dir_path}/{image_name}")
img.resize((downsample, downsample)).save(downsample_dir_path + "/" + image_name)
except Exception as e:
print(e)
log_errors = log_response(log_errors,
index = i,
image = "downsized_" + image_name,
url = url,
response_code = str(e))
update_log(log = log_errors, index = i, filepath = error_log_filepath)

# check for too many requests
elif response.status_code in REDO_CODE_LIST:
Expand Down
149 changes: 149 additions & 0 deletions tests/test_buddycheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import unittest
from unittest.mock import patch
import pandas as pd
import tempfile
import os
from cautiousrobot import BuddyCheck
from cautiousrobot.utils import process_csv

class TestBuddyCheck(unittest.TestCase):
def setUp(self):
self.buddy_check = BuddyCheck()
self.buddy_check_filename = BuddyCheck(buddy_id='filename')
self.buddy_check_id_col = BuddyCheck(buddy_id = "filename", buddy_col = "sha256")

self.img_source_file = tempfile.NamedTemporaryFile(delete=False, mode='w')
self.checksum_source_file = tempfile.NamedTemporaryFile(delete=False, mode='w')

self.img_source_file.write("""filename,checksum
image1.jpg,abc123
image2.jpg,def456
image3.jpg,ghi789
""")
self.img_source_file.close()

self.checksum_source_file.write("""filename,md5
image1.jpg,abc123
image2.jpg,def456
image3.jpg,ghi789
""")
self.checksum_source_file.close()


def tearDown(self):
os.remove(self.img_source_file.name)
os.remove(self.checksum_source_file.name)

def test_initialization(self):
self.assertEqual(self.buddy_check.buddy_id, None)
self.assertEqual(self.buddy_check.buddy_col, 'md5')
self.assertEqual(self.buddy_check_id_col.buddy_id, 'filename')
self.assertEqual(self.buddy_check_id_col.buddy_col, 'sha256')

def test_merge_on_checksum(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.read_csv(self.checksum_source_file.name)

merged_df = self.buddy_check_filename.merge_on_checksum(source_df, checksum_df, 'checksum')
expected_df = pd.DataFrame({
'filename_x': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'checksum': ['abc123', 'def456', 'ghi789'],
'filename_y': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'md5': ['abc123', 'def456', 'ghi789']
})
pd.testing.assert_frame_equal(merged_df, expected_df)

def test_merge_on_filename_checksum(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.read_csv(self.checksum_source_file.name)
merged_df = self.buddy_check_filename.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
expected_df = pd.DataFrame({
'filename': ['image1.jpg', 'image2.jpg', 'image3.jpg'],
'checksum': ['abc123', 'def456', 'ghi789'],
'md5': ['abc123', 'def456', 'ghi789']
})
pd.testing.assert_frame_equal(merged_df, expected_df)

def test_check_alignment_all_matching(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.read_csv(self.checksum_source_file.name)
merged_df = self.buddy_check_filename.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
missing_imgs = self.buddy_check_filename.check_alignment(source_df, merged_df)
self.assertIsNone(missing_imgs)

def test_check_alignment_some_missing(self):
source_df = pd.DataFrame({
'filename': ['image1.jpg', 'image2.jpg', 'image3.jpg', 'image4.jpg'],
'checksum': ['abc123', 'def456', 'ghi789', 'jkl012']
})
checksum_df = pd.read_csv(self.checksum_source_file.name)
merged_df = self.buddy_check_filename.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
missing_imgs = self.buddy_check_filename.check_alignment(source_df, merged_df)
expected_missing_imgs = pd.DataFrame({
'filename': ['image4.jpg'],
'checksum': ['jkl012']
})
pd.testing.assert_frame_equal(missing_imgs.reset_index(drop=True), expected_missing_imgs)

def test_validate_download_success(self):
missing_imgs = self.buddy_check.validate_download(
source_df=pd.read_csv(self.img_source_file.name),
checksum_df=pd.read_csv(self.checksum_source_file.name),
source_id_col="filename",
source_validation_col="checksum"
)
self.assertIsNone(missing_imgs)

def test_validate_download_missing_images(self):
source_df = pd.DataFrame({
'filename': ['image1.jpg', 'image2.jpg', 'image3.jpg', 'image4.jpg'],
'checksum': ['abc123', 'def456', 'ghi789', 'jkl012']
})
checksum_df = pd.read_csv(self.checksum_source_file.name)
missing_imgs = self.buddy_check_filename.validate_download(
source_df=source_df,
checksum_df=checksum_df,
source_id_col="filename",
source_validation_col="checksum"
)
expected_missing_imgs = pd.DataFrame({
'filename': ['image4.jpg'],
'checksum': ['jkl012']
})
pd.testing.assert_frame_equal(missing_imgs.reset_index(drop=True), expected_missing_imgs)

def test_check_alignment_no_matching(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.DataFrame({
'filename': ['image4.jpg', 'image5.jpg', 'image6.jpg'],
'md5': ['xyz123', 'uvw456', 'rst789']
})
merged_df = self.buddy_check_filename.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
missing_imgs = self.buddy_check_filename.check_alignment(source_df, merged_df)
self.assertIsNotNone(missing_imgs)
self.assertEqual(missing_imgs.shape[0], 3)

def test_check_alignment_checksums_only(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.read_csv(self.checksum_source_file.name)
merged_df = self.buddy_check.merge_on_checksum(source_df, checksum_df, 'checksum')
missing_imgs = self.buddy_check.check_alignment(source_df, merged_df)
self.assertIsNone(missing_imgs)

def test_check_alignment_empty_img_df(self):
source_df = pd.DataFrame(columns=['filename', 'checksum'])
checksum_df = pd.read_csv(self.checksum_source_file.name)
merged_df = self.buddy_check.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
missing_imgs = self.buddy_check.check_alignment(source_df, merged_df)
self.assertIsNone(missing_imgs)

def test_check_alignment_empty_checksum_df(self):
source_df = pd.read_csv(self.img_source_file.name)
checksum_df = pd.DataFrame(columns=['filename', 'md5'])
merged_df = self.buddy_check_filename.merge_on_filename_checksum(source_df, checksum_df, 'filename', 'checksum')
missing_imgs = self.buddy_check.check_alignment(source_df, merged_df)
self.assertIsNotNone(missing_imgs)
self.assertEqual(missing_imgs.shape[0], 3)

if __name__ == '__main__':
unittest.main()
Loading