Skip to content

Commit 22d9fdc

Browse files
committed
fix: 修复旧word文档图片无法正常识别 #1533
1 parent af509a9 commit 22d9fdc

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

apps/common/handle/impl/doc_split_handle.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
from typing import List
1515

1616
from docx import Document, ImagePart
17+
from docx.oxml import ns
1718
from docx.table import Table
1819
from docx.text.paragraph import Paragraph
19-
from docx.oxml import ns
2020

2121
from common.handle.base_split_handle import BaseSplitHandle
2222
from common.util.split_model import SplitModel
@@ -33,11 +33,8 @@
3333
combine_nsmap = {**ns.nsmap, **old_docx_nsmap}
3434

3535

36-
def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=True):
37-
if is_new_docx:
38-
image_ids = image.xpath('.//a:blip/@r:embed')
39-
else:
40-
image_ids = image.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap)
36+
def image_to_mode(image, doc: Document, images_list, get_image_id):
37+
image_ids = image['get_image_id_handle'](image.get('image'))
4138
for img_id in image_ids: # 获取图片id
4239
part = doc.part.related_parts[img_id] # 根据图片id获取对应的图片
4340
if isinstance(part, ImagePart):
@@ -49,14 +46,15 @@ def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=T
4946

5047

5148
def get_paragraph_element_images(paragraph_element, doc: Document, images_list, get_image_id):
52-
images_xpath_list = [".//pic:pic", ".//w:pict"]
49+
images_xpath_list = [(".//pic:pic", lambda img: img.xpath('.//a:blip/@r:embed')),
50+
(".//w:pict", lambda img: img.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap))]
5351
images = []
54-
for images_xpath in images_xpath_list:
52+
for images_xpath, get_image_id_handle in images_xpath_list:
5553
try:
5654
_images = paragraph_element.xpath(images_xpath)
5755
if _images is not None and len(_images) > 0:
5856
for image in _images:
59-
images.append(image)
57+
images.append({'image': image, 'get_image_id_handle': get_image_id_handle})
6058
except Exception as e:
6159
pass
6260
return images

0 commit comments

Comments
 (0)