Skip to content

Commit cf78d78

Browse files
committed
Refactor main function to reduce cyclomatic complexity
- Break down complex main function (complexity 11) into smaller focused functions - Add validate_and_setup() for prerequisite validation and setup - Add process_directory() for directory processing logic - Add process_single_file() for single file processing - Add log_processing_results() for result logging - Reduce main function complexity from 11 to 3 (well below max of 10) - Improve code maintainability and testability - Fix flake8 C901 complexity violation
1 parent 8e5f881 commit cf78d78

File tree

1 file changed

+87
-38
lines changed

1 file changed

+87
-38
lines changed

main.py

Lines changed: 87 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -251,65 +251,114 @@ def process_images_parallel(image_files, output_path, max_workers=None):
251251
return successful_files, failed_files, results
252252

253253

254-
def main(input_path, output_path, max_workers=None):
254+
def validate_and_setup(input_path, output_path):
255+
"""
256+
Validate prerequisites and setup output directory
257+
:param input_path: Input path to validate
258+
:param output_path: Output path to create if needed
259+
:return: True if validation passes, False otherwise
260+
"""
255261
# Check if tesseract is installed or not
256262
if not check_pre_requisites_tesseract():
257-
return
263+
return False
258264

259265
# Check if a valid input directory is given or not
260266
if not check_path(input_path):
261267
logging.error("Nothing found at `{}`".format(input_path))
262-
return
268+
return False
263269

264270
# Create output directory
265271
if output_path:
266272
create_directory(output_path)
267273
logging.debug("Creating Output Path {}".format(output_path))
268274

269-
# Check if input_path is directory or file
270-
if os.path.isdir(input_path):
271-
logging.debug("The Input Path is a directory.")
275+
return True
272276

273-
# Get valid image files efficiently
274-
image_files, other_files = get_valid_image_files(input_path)
275277

276-
if len(image_files) == 0:
277-
logging.error("No valid image files found at your input location")
278-
logging.error(
279-
"Supported formats: [{}]".format(", ".join(VALID_IMAGE_EXTENSIONS))
280-
)
281-
return
278+
def process_directory(input_path, output_path, max_workers):
279+
"""
280+
Process all images in a directory
281+
:param input_path: Directory containing images
282+
:param output_path: Output directory for text files
283+
:param max_workers: Number of parallel workers
284+
"""
285+
logging.debug("The Input Path is a directory.")
282286

283-
total_file_count = len(image_files) + other_files
284-
logging.info(
285-
"Found total {} file(s) ({} valid images, {} other files)\n".format(
286-
total_file_count, len(image_files), other_files
287-
)
287+
# Get valid image files efficiently
288+
image_files, other_files = get_valid_image_files(input_path)
289+
290+
if len(image_files) == 0:
291+
logging.error("No valid image files found at your input location")
292+
logging.error(
293+
"Supported formats: [{}]".format(", ".join(VALID_IMAGE_EXTENSIONS))
288294
)
295+
return
289296

290-
# Process images in parallel
291-
successful_files, failed_files, results = process_images_parallel(image_files, output_path, max_workers)
297+
total_file_count = len(image_files) + other_files
298+
logging.info(
299+
"Found total {} file(s) ({} valid images, {} other files)\n".format(
300+
total_file_count, len(image_files), other_files
301+
)
302+
)
292303

293-
# Print results if not writing to files
294-
if not output_path:
295-
for filename, text in results:
296-
print(f"\n=== {filename} ===")
297-
print(text)
304+
# Process images in parallel
305+
successful_files, failed_files, results = process_images_parallel(image_files, output_path, max_workers)
306+
307+
# Print results if not writing to files
308+
if not output_path:
309+
for filename, text in results:
310+
print(f"\n=== {filename} ===")
311+
print(text)
312+
313+
# Log final results
314+
log_processing_results(successful_files, failed_files, other_files)
315+
316+
317+
def process_single_file(input_path, output_path):
318+
"""
319+
Process a single image file
320+
:param input_path: Path to the image file
321+
:param output_path: Output directory for text file
322+
"""
323+
filename = os.path.basename(input_path)
324+
logging.debug("The Input Path is a file {}".format(filename))
325+
image_path = Path(input_path)
326+
success, text, _ = run_tesseract_optimized(image_path, output_path)
327+
if success and text:
328+
print(text)
298329

299-
logging.info("Parsing Completed!\n")
300-
logging.info("Successfully parsed images: {}".format(successful_files))
301-
if failed_files > 0:
302-
logging.warning("Failed to parse images: {}".format(failed_files))
303-
if other_files > 0:
304-
logging.info("Files with unsupported file extensions: {}".format(other_files))
305330

331+
def log_processing_results(successful_files, failed_files, other_files):
332+
"""
333+
Log the results of image processing
334+
:param successful_files: Number of successfully processed files
335+
:param failed_files: Number of failed files
336+
:param other_files: Number of non-image files
337+
"""
338+
logging.info("Parsing Completed!\n")
339+
logging.info("Successfully parsed images: {}".format(successful_files))
340+
if failed_files > 0:
341+
logging.warning("Failed to parse images: {}".format(failed_files))
342+
if other_files > 0:
343+
logging.info("Files with unsupported file extensions: {}".format(other_files))
344+
345+
346+
def main(input_path, output_path, max_workers=None):
347+
"""
348+
Main function to process images and extract text using OCR
349+
:param input_path: Path to input file or directory
350+
:param output_path: Path to output directory
351+
:param max_workers: Number of parallel workers
352+
"""
353+
# Validate prerequisites and setup
354+
if not validate_and_setup(input_path, output_path):
355+
return
356+
357+
# Process based on input type
358+
if os.path.isdir(input_path):
359+
process_directory(input_path, output_path, max_workers)
306360
else:
307-
filename = os.path.basename(input_path)
308-
logging.debug("The Input Path is a file {}".format(filename))
309-
image_path = Path(input_path)
310-
success, text, _ = run_tesseract_optimized(image_path, output_path)
311-
if success and text:
312-
print(text)
361+
process_single_file(input_path, output_path)
313362

314363

315364
if __name__ == "__main__":

0 commit comments

Comments
 (0)