14
14
from typing import List
15
15
16
16
from docx import Document , ImagePart
17
+ from docx .oxml import ns
17
18
from docx .table import Table
18
19
from docx .text .paragraph import Paragraph
19
- from docx .oxml import ns
20
20
21
21
from common .handle .base_split_handle import BaseSplitHandle
22
22
from common .util .split_model import SplitModel
33
33
combine_nsmap = {** ns .nsmap , ** old_docx_nsmap }
34
34
35
35
36
- def image_to_mode (image , doc : Document , images_list , get_image_id , is_new_docx = True ):
37
- if is_new_docx :
38
- image_ids = image .xpath ('.//a:blip/@r:embed' )
39
- else :
40
- image_ids = image .xpath ('.//v:imagedata/@r:id' , namespaces = combine_nsmap )
36
+ def image_to_mode (image , doc : Document , images_list , get_image_id ):
37
+ image_ids = image ['get_image_id_handle' ](image .get ('image' ))
41
38
for img_id in image_ids : # 获取图片id
42
39
part = doc .part .related_parts [img_id ] # 根据图片id获取对应的图片
43
40
if isinstance (part , ImagePart ):
@@ -49,14 +46,15 @@ def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=T
49
46
50
47
51
48
def get_paragraph_element_images (paragraph_element , doc : Document , images_list , get_image_id ):
52
- images_xpath_list = [".//pic:pic" , ".//w:pict" ]
49
+ images_xpath_list = [(".//pic:pic" , lambda img : img .xpath ('.//a:blip/@r:embed' )),
50
+ (".//w:pict" , lambda img : img .xpath ('.//v:imagedata/@r:id' , namespaces = combine_nsmap ))]
53
51
images = []
54
- for images_xpath in images_xpath_list :
52
+ for images_xpath , get_image_id_handle in images_xpath_list :
55
53
try :
56
54
_images = paragraph_element .xpath (images_xpath )
57
55
if _images is not None and len (_images ) > 0 :
58
56
for image in _images :
59
- images .append (image )
57
+ images .append ({ ' image' : image , 'get_image_id_handle' : get_image_id_handle } )
60
58
except Exception as e :
61
59
pass
62
60
return images
0 commit comments