Fix docs and impr visualize (#422)

SamitHuang · web-flow · commit b42c4e5bd2c3 · 2023-06-16T18:01:48.000+08:00
diff --git a/README.md b/README.md
@@ -11,13 +11,13 @@
 
 English | [中文](README_CN.md)
 
-[Introduction](#introduction) |
-[Installation](#installation) |
-[Quick Start](#quick-start) |
-[Tutorials](#tutorials) |
-[Model List](#model-list) |
-[Dataset List](#dataset-list) |
-[Notes](#notes)
+[📝Introduction](#introduction) |
+[🔨Installation](#installation) |
+[🚀Quick Start](#quick-start) |
+[📚Tutorials](#tutorials) |
+[🎁Model List](#model-list) |
+[📰Dataset List](#dataset-list) |
+[🎉Notes](#notes)
 
 </div>
 
diff --git a/README_CN.md b/README_CN.md
@@ -11,13 +11,13 @@
 
 [English](README.md) | 中文
 
-[简介](#简介) |
-[安装教程](#安装教程) |
-[快速开始](#快速开始) |
-[使用教程](#使用教程) |
-[模型列表](#模型列表) |
-[数据集列表](#数据集列表) |
-[更新日志](#更新日志)
+[📝简介](#简介) |
+[🔨安装教程](#安装教程) |
+[🚀快速开始](#快速开始) |
+[📚使用教程](#使用教程) |
+[🎁模型列表](#模型列表) |
+[📰数据集列表](#数据集列表) |
+[🎉更新日志](#更新日志)
 
 </div>
 
diff --git a/mindocr/utils/visualize.py b/mindocr/utils/visualize.py
@@ -1,3 +1,4 @@
+import math
 from typing import List, Union
 
 import cv2
@@ -66,8 +67,9 @@ def draw_boxes(
     image: Union[str, np.array],
     bboxes: Union[list, np.array],
     color: Union[tuple, str] = (255, 0, 0),
-    thickness=2,
+    thickness=1,
     is_bgr_img=False,
+    color_random=False,
 ):  # , to_rgb=False):
     """image can be str or np.array for image in 'BGR' colorm mode.
     color: list for color of each box, or tuple for color of all boxes with the same color. in RGB order
@@ -132,15 +134,18 @@ def _get_draw_point_and_font_size(box, font_size="auto", text_inside_box=True, i
         pt_sums = np.array(box).sum(axis=1)
         corner = box[np.argmin(pt_sums)]
 
-        box_h = box[:, 1].max() - box[:, 1].min()
-        box_w = box[:, 0].max() - box[:, 0].min()
+        # box_h = box[:, 1].max() - box[:, 1].min()
+        # box_w = box[:, 0].max() - box[:, 0].min()
+
+        box_h = int(math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2))
+        box_w = int(math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2))
 
         # print(font_size)
         # TODO: consider the height and witdh of the text
         if text_inside_box:
             draw_point_w = corner[0] + box_w * 0.1
             draw_point_h = corner[1] - box_h * 0.05
-            font_size = round(box_h * 0.8) if not isinstance(font_size, int) else font_size
+            font_size = round(box_h * 0.9) if not isinstance(font_size, int) else font_size
         else:
             if isinstance(font_size, int) or isinstance(font_size, float):
                 font_size = font_size
@@ -154,14 +159,20 @@ def _get_draw_point_and_font_size(box, font_size="auto", text_inside_box=True, i
 
     for i, text in enumerate(texts):
         # draw text on the most left-top point
-        draw_point, fs = _get_draw_point_and_font_size(
-            bboxes[i], font_size, text_inside_box=text_inside_box, img_h=img_h
-        )
+        box = bboxes[i]
+        draw_point, fs = _get_draw_point_and_font_size(box, font_size, text_inside_box=text_inside_box, img_h=img_h)
 
         # TODO: use other lib which can set font size dynamically after font loading
-        ttf = ImageFont.load_default() if not font_path else ImageFont.truetype(font_path, fs)
+        font = ImageFont.load_default() if not font_path else ImageFont.truetype(font_path, fs, encoding="utf-8")
+
+        font_width = font.getsize(text)[0]
+        box_width = int(math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2))
+        if font_width > box_width:
+            font_size = int(fs * box_width / font_width)
+            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+
         # refine the draw starting
-        img_draw.text(draw_point, text, font=ttf, fill=text_color)
+        img_draw.text(draw_point, text, font=font, fill=text_color)
 
     return np.array(pimg)
 
diff --git a/tools/infer/text/README.md b/tools/infer/text/README.md
@@ -14,6 +14,7 @@ Thus, online inference is more suitable for demonstration and to visually evalua
 | Python      | >=3.7   |
 
 Supported platforms: Linux, MacOS, Windows (Not tested)
+
 Supported devices: CPU, GPU, and Ascend.
 
 Please clone MindOCR at first
@@ -173,6 +174,7 @@ python tools/infer/text/predict_system.py --image_dir {path_to_img or dir_to_img
                                           --det_algorithm DB++  \
                                           --rec_algorithm CRNN
 ```
+> Note: set `--visualize_output True` if you want to visualize the detection and recognition results on the input image.
 
 After running, the inference results will be saved in `{args.draw_img_save_dir}/system_results.txt`,  where `--draw_img_save_dir` is the directory for saving  results and is set to `./inference_results` by default. Here are some results for examples.